From 4baaa8c3d891b57036fd2a7c6a890737793fe3a0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 7 Dec 2016 16:08:27 +0100 Subject: [PATCH 01/14] qemu-img: fix in-flight count for qemu-img bench With aio=native (qemu-img bench -n) one or more requests can be completed when a new request is submitted. This in turn can cause bench_cb to recurse before b->in_flight is updated. This causes multiple I/Os to be submitted with the same offset and, furthermore, the blk_aio_* coroutines are never freed and qemu-img aborts. Signed-off-by: Paolo Bonzini Reviewed-by: John Snow Signed-off-by: Kevin Wolf --- qemu-img.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 6949b73ca5..5df66fe661 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -3559,20 +3559,23 @@ static void bench_cb(void *opaque, int ret) } while (b->n > b->in_flight && b->in_flight < b->nrreq) { + int64_t offset = b->offset; + /* blk_aio_* might look for completed I/Os and kick bench_cb + * again, so make sure this operation is counted by in_flight + * and b->offset is ready for the next submission. + */ + b->in_flight++; + b->offset += b->step; + b->offset %= b->image_size; if (b->write) { - acb = blk_aio_pwritev(b->blk, b->offset, b->qiov, 0, - bench_cb, b); + acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b); } else { - acb = blk_aio_preadv(b->blk, b->offset, b->qiov, 0, - bench_cb, b); + acb = blk_aio_preadv(b->blk, offset, b->qiov, 0, bench_cb, b); } if (!acb) { error_report("Failed to issue request"); exit(EXIT_FAILURE); } - b->in_flight++; - b->offset += b->step; - b->offset %= b->image_size; } } From 536fca7f7ea4913f71df7f420953e97619be74e1 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 7 Nov 2016 16:34:35 +0100 Subject: [PATCH 02/14] coroutine: Introduce qemu_coroutine_enter_if_inactive() In the context of asynchronous work, if we have a worker coroutine that didn't yield, the parent coroutine cannot be reentered because it hasn't yielded yet. In this case we don't even have to reenter the parent because it will see that the work is already done and won't even yield. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Alberto Garcia --- include/qemu/coroutine.h | 6 ++++++ util/qemu-coroutine.c | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h index e6a60d55fd..12584ed1b7 100644 --- a/include/qemu/coroutine.h +++ b/include/qemu/coroutine.h @@ -70,6 +70,12 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque); */ void qemu_coroutine_enter(Coroutine *coroutine); +/** + * Transfer control to a coroutine if it's not active (i.e. part of the call + * stack of the running coroutine). Otherwise, do nothing. + */ +void qemu_coroutine_enter_if_inactive(Coroutine *co); + /** * Transfer control back to a coroutine's caller * diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c index 737bffa984..a5d2f6c0c3 100644 --- a/util/qemu-coroutine.c +++ b/util/qemu-coroutine.c @@ -131,6 +131,13 @@ void qemu_coroutine_enter(Coroutine *co) } } +void qemu_coroutine_enter_if_inactive(Coroutine *co) +{ + if (!qemu_coroutine_entered(co)) { + qemu_coroutine_enter(co); + } +} + void coroutine_fn qemu_coroutine_yield(void) { Coroutine *self = qemu_coroutine_self(); From 10c855196837059b5d988557183c8f8392033e52 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 7 Nov 2016 18:00:29 +0100 Subject: [PATCH 03/14] quorum: Remove s from quorum_aio_get() arguments There is no point in passing the value of bs->opaque in order to overwrite it with itself. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Paolo Bonzini Reviewed-by: Alberto Garcia --- block/quorum.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index d122299352..dfa9fd3fe5 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -171,18 +171,17 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b) return a->l == b->l; } -static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s, - BlockDriverState *bs, +static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs, QEMUIOVector *qiov, uint64_t sector_num, int nb_sectors, BlockCompletionFunc *cb, void *opaque) { + BDRVQuorumState *s = bs->opaque; QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque); int i; - acb->common.bs->opaque = s; acb->sector_num = sector_num; acb->nb_sectors = nb_sectors; acb->qiov = qiov; @@ -691,7 +690,7 @@ static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs, void *opaque) { BDRVQuorumState *s = bs->opaque; - QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, + QuorumAIOCB *acb = quorum_aio_get(bs, qiov, sector_num, nb_sectors, cb, opaque); acb->is_read = true; acb->children_read = 0; @@ -711,7 +710,7 @@ static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs, void *opaque) { BDRVQuorumState *s = bs->opaque; - QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors, + QuorumAIOCB *acb = quorum_aio_get(bs, qiov, sector_num, nb_sectors, cb, opaque); int i; From ce15dc08ef13438ba7be75e6887162ad2cc5c6c9 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 8 Nov 2016 11:10:14 +0100 Subject: [PATCH 04/14] quorum: Implement .bdrv_co_readv/writev This converts the quorum block driver from implementing callback-based interfaces for read/write to coroutine-based ones. This is the first step that will allow us further simplification of the code. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Alberto Garcia --- block/quorum.c | 194 +++++++++++++++++++++++++++++-------------------- 1 file changed, 116 insertions(+), 78 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index dfa9fd3fe5..6a7bd9199b 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -97,7 +97,7 @@ typedef struct QuorumAIOCB QuorumAIOCB; * $children_count QuorumChildRequest. */ typedef struct QuorumChildRequest { - BlockAIOCB *aiocb; + BlockDriverState *bs; QEMUIOVector qiov; uint8_t *buf; int ret; @@ -110,7 +110,8 @@ typedef struct QuorumChildRequest { * used to do operations on each children and track overall progress. */ struct QuorumAIOCB { - BlockAIOCB common; + BlockDriverState *bs; + Coroutine *co; /* Request metadata */ uint64_t sector_num; @@ -129,36 +130,23 @@ struct QuorumAIOCB { QuorumVotes votes; bool is_read; + bool has_completed; int vote_ret; int children_read; /* how many children have been read from */ }; +typedef struct QuorumCo { + QuorumAIOCB *acb; + int idx; +} QuorumCo; + static bool quorum_vote(QuorumAIOCB *acb); -static void quorum_aio_cancel(BlockAIOCB *blockacb) -{ - QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common); - BDRVQuorumState *s = acb->common.bs->opaque; - int i; - - /* cancel all callbacks */ - for (i = 0; i < s->num_children; i++) { - if (acb->qcrs[i].aiocb) { - bdrv_aio_cancel_async(acb->qcrs[i].aiocb); - } - } -} - -static AIOCBInfo quorum_aiocb_info = { - .aiocb_size = sizeof(QuorumAIOCB), - .cancel_async = quorum_aio_cancel, -}; - static void quorum_aio_finalize(QuorumAIOCB *acb) { - acb->common.cb(acb->common.opaque, acb->vote_ret); + acb->has_completed = true; g_free(acb->qcrs); - qemu_aio_unref(acb); + qemu_coroutine_enter_if_inactive(acb->co); } static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b) @@ -174,14 +162,14 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b) static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs, QEMUIOVector *qiov, uint64_t sector_num, - int nb_sectors, - BlockCompletionFunc *cb, - void *opaque) + int nb_sectors) { BDRVQuorumState *s = bs->opaque; - QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque); + QuorumAIOCB *acb = g_new(QuorumAIOCB, 1); int i; + acb->co = qemu_coroutine_self(); + acb->bs = bs; acb->sector_num = sector_num; acb->nb_sectors = nb_sectors; acb->qiov = qiov; @@ -191,6 +179,7 @@ static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs, acb->rewrite_count = 0; acb->votes.compare = quorum_sha256_compare; QLIST_INIT(&acb->votes.vote_list); + acb->has_completed = false; acb->is_read = false; acb->vote_ret = 0; @@ -217,7 +206,7 @@ static void quorum_report_bad(QuorumOpType type, uint64_t sector_num, static void quorum_report_failure(QuorumAIOCB *acb) { - const char *reference = bdrv_get_device_or_node_name(acb->common.bs); + const char *reference = bdrv_get_device_or_node_name(acb->bs); qapi_event_send_quorum_failure(reference, acb->sector_num, acb->nb_sectors, &error_abort); } @@ -226,7 +215,7 @@ static int quorum_vote_error(QuorumAIOCB *acb); static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb) { - BDRVQuorumState *s = acb->common.bs->opaque; + BDRVQuorumState *s = acb->bs->opaque; if (acb->success_count < s->threshold) { acb->vote_ret = quorum_vote_error(acb); @@ -252,7 +241,7 @@ static void quorum_rewrite_aio_cb(void *opaque, int ret) quorum_aio_finalize(acb); } -static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb); +static int read_fifo_child(QuorumAIOCB *acb); static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source) { @@ -272,14 +261,14 @@ static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret) QuorumAIOCB *acb = sacb->parent; QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE; quorum_report_bad(type, acb->sector_num, acb->nb_sectors, - sacb->aiocb->bs->node_name, ret); + sacb->bs->node_name, ret); } -static void quorum_fifo_aio_cb(void *opaque, int ret) +static int quorum_fifo_aio_cb(void *opaque, int ret) { QuorumChildRequest *sacb = opaque; QuorumAIOCB *acb = sacb->parent; - BDRVQuorumState *s = acb->common.bs->opaque; + BDRVQuorumState *s = acb->bs->opaque; assert(acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO); @@ -288,8 +277,7 @@ static void quorum_fifo_aio_cb(void *opaque, int ret) /* We try to read next child in FIFO order if we fail to read */ if (acb->children_read < s->num_children) { - read_fifo_child(acb); - return; + return read_fifo_child(acb); } } @@ -297,13 +285,14 @@ static void quorum_fifo_aio_cb(void *opaque, int ret) /* FIXME: rewrite failed children if acb->children_read > 1? */ quorum_aio_finalize(acb); + return ret; } static void quorum_aio_cb(void *opaque, int ret) { QuorumChildRequest *sacb = opaque; QuorumAIOCB *acb = sacb->parent; - BDRVQuorumState *s = acb->common.bs->opaque; + BDRVQuorumState *s = acb->bs->opaque; bool rewrite = false; int i; @@ -518,7 +507,7 @@ static bool quorum_compare(QuorumAIOCB *acb, QEMUIOVector *a, QEMUIOVector *b) { - BDRVQuorumState *s = acb->common.bs->opaque; + BDRVQuorumState *s = acb->bs->opaque; ssize_t offset; /* This driver will replace blkverify in this particular case */ @@ -538,7 +527,7 @@ static bool quorum_compare(QuorumAIOCB *acb, /* Do a vote to get the error code */ static int quorum_vote_error(QuorumAIOCB *acb) { - BDRVQuorumState *s = acb->common.bs->opaque; + BDRVQuorumState *s = acb->bs->opaque; QuorumVoteVersion *winner = NULL; QuorumVotes error_votes; QuorumVoteValue result_value; @@ -573,7 +562,7 @@ static bool quorum_vote(QuorumAIOCB *acb) bool rewrite = false; int i, j, ret; QuorumVoteValue hash; - BDRVQuorumState *s = acb->common.bs->opaque; + BDRVQuorumState *s = acb->bs->opaque; QuorumVoteVersion *winner; if (quorum_has_too_much_io_failed(acb)) { @@ -649,10 +638,25 @@ free_exit: return rewrite; } -static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb) +static void read_quorum_children_entry(void *opaque) { - BDRVQuorumState *s = acb->common.bs->opaque; - int i; + QuorumCo *co = opaque; + QuorumAIOCB *acb = co->acb; + BDRVQuorumState *s = acb->bs->opaque; + int i = co->idx; + int ret; + + acb->qcrs[i].bs = s->children[i]->bs; + ret = bdrv_co_preadv(s->children[i], acb->sector_num * BDRV_SECTOR_SIZE, + acb->nb_sectors * BDRV_SECTOR_SIZE, + &acb->qcrs[i].qiov, 0); + quorum_aio_cb(&acb->qcrs[i], ret); +} + +static int read_quorum_children(QuorumAIOCB *acb) +{ + BDRVQuorumState *s = acb->bs->opaque; + int i, ret; acb->children_read = s->num_children; for (i = 0; i < s->num_children; i++) { @@ -662,65 +666,99 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb) } for (i = 0; i < s->num_children; i++) { - acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i], acb->sector_num, - &acb->qcrs[i].qiov, acb->nb_sectors, - quorum_aio_cb, &acb->qcrs[i]); + Coroutine *co; + QuorumCo data = { + .acb = acb, + .idx = i, + }; + + co = qemu_coroutine_create(read_quorum_children_entry, &data); + qemu_coroutine_enter(co); } - return &acb->common; + if (!acb->has_completed) { + qemu_coroutine_yield(); + } + + ret = acb->vote_ret; + + return ret; } -static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb) +static int read_fifo_child(QuorumAIOCB *acb) { - BDRVQuorumState *s = acb->common.bs->opaque; + BDRVQuorumState *s = acb->bs->opaque; int n = acb->children_read++; + int ret; - acb->qcrs[n].aiocb = bdrv_aio_readv(s->children[n], acb->sector_num, - acb->qiov, acb->nb_sectors, - quorum_fifo_aio_cb, &acb->qcrs[n]); + acb->qcrs[n].bs = s->children[n]->bs; + ret = bdrv_co_preadv(s->children[n], acb->sector_num * BDRV_SECTOR_SIZE, + acb->nb_sectors * BDRV_SECTOR_SIZE, acb->qiov, 0); + ret = quorum_fifo_aio_cb(&acb->qcrs[n], ret); - return &acb->common; + return ret; } -static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *qiov, - int nb_sectors, - BlockCompletionFunc *cb, - void *opaque) +static int quorum_co_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov) { BDRVQuorumState *s = bs->opaque; - QuorumAIOCB *acb = quorum_aio_get(bs, qiov, sector_num, - nb_sectors, cb, opaque); + QuorumAIOCB *acb = quorum_aio_get(bs, qiov, sector_num, nb_sectors); + int ret; + acb->is_read = true; acb->children_read = 0; if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) { - return read_quorum_children(acb); + ret = read_quorum_children(acb); + } else { + ret = read_fifo_child(acb); } - - return read_fifo_child(acb); + g_free(acb); + return ret; } -static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *qiov, - int nb_sectors, - BlockCompletionFunc *cb, - void *opaque) +static void write_quorum_entry(void *opaque) +{ + QuorumCo *co = opaque; + QuorumAIOCB *acb = co->acb; + BDRVQuorumState *s = acb->bs->opaque; + int i = co->idx; + int ret; + + acb->qcrs[i].bs = s->children[i]->bs; + ret = bdrv_co_pwritev(s->children[i], acb->sector_num * BDRV_SECTOR_SIZE, + acb->nb_sectors * BDRV_SECTOR_SIZE, acb->qiov, 0); + quorum_aio_cb(&acb->qcrs[i], ret); +} + +static int quorum_co_writev(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov) { BDRVQuorumState *s = bs->opaque; - QuorumAIOCB *acb = quorum_aio_get(bs, qiov, sector_num, nb_sectors, - cb, opaque); - int i; + QuorumAIOCB *acb = quorum_aio_get(bs, qiov, sector_num, nb_sectors); + int i, ret; for (i = 0; i < s->num_children; i++) { - acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i], sector_num, - qiov, nb_sectors, &quorum_aio_cb, - &acb->qcrs[i]); + Coroutine *co; + QuorumCo data = { + .acb = acb, + .idx = i, + }; + + co = qemu_coroutine_create(write_quorum_entry, &data); + qemu_coroutine_enter(co); } - return &acb->common; + if (!acb->has_completed) { + qemu_coroutine_yield(); + } + + ret = acb->vote_ret; + + return ret; } static int64_t quorum_getlength(BlockDriverState *bs) @@ -1097,8 +1135,8 @@ static BlockDriver bdrv_quorum = { .bdrv_getlength = quorum_getlength, - .bdrv_aio_readv = quorum_aio_readv, - .bdrv_aio_writev = quorum_aio_writev, + .bdrv_co_readv = quorum_co_readv, + .bdrv_co_writev = quorum_co_writev, .bdrv_add_child = quorum_add_child, .bdrv_del_child = quorum_del_child, From 0f31977d9dd659ae39aa42e3904c5c1b23da1475 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 10 Nov 2016 14:24:27 +0100 Subject: [PATCH 05/14] quorum: Do cleanup in caller coroutine Instead of calling quorum_aio_finalize() deeply nested in what used to be an AIO callback, do it in the same functions that allocated the AIOCB. Signed-off-by: Kevin Wolf Reviewed-by: Alberto Garcia Reviewed-by: Eric Blake --- block/quorum.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index 6a7bd9199b..e044010717 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -144,9 +144,8 @@ static bool quorum_vote(QuorumAIOCB *acb); static void quorum_aio_finalize(QuorumAIOCB *acb) { - acb->has_completed = true; g_free(acb->qcrs); - qemu_coroutine_enter_if_inactive(acb->co); + g_free(acb); } static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b) @@ -238,7 +237,8 @@ static void quorum_rewrite_aio_cb(void *opaque, int ret) return; } - quorum_aio_finalize(acb); + acb->has_completed = true; + qemu_coroutine_enter_if_inactive(acb->co); } static int read_fifo_child(QuorumAIOCB *acb); @@ -284,7 +284,7 @@ static int quorum_fifo_aio_cb(void *opaque, int ret) acb->vote_ret = ret; /* FIXME: rewrite failed children if acb->children_read > 1? */ - quorum_aio_finalize(acb); + return ret; } @@ -322,7 +322,8 @@ static void quorum_aio_cb(void *opaque, int ret) /* if no rewrite is done the code will finish right away */ if (!rewrite) { - quorum_aio_finalize(acb); + acb->has_completed = true; + qemu_coroutine_enter_if_inactive(acb->co); } } @@ -715,7 +716,8 @@ static int quorum_co_readv(BlockDriverState *bs, } else { ret = read_fifo_child(acb); } - g_free(acb); + quorum_aio_finalize(acb); + return ret; } @@ -757,6 +759,7 @@ static int quorum_co_writev(BlockDriverState *bs, } ret = acb->vote_ret; + quorum_aio_finalize(acb); return ret; } From 7cd9b3964e2e8056c5863b6d9fd376bc0c1fcdf6 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 10 Nov 2016 16:13:15 +0100 Subject: [PATCH 06/14] quorum: Inline quorum_aio_cb() This is a conversion to a more natural coroutine style and improves the readability of the driver. Signed-off-by: Kevin Wolf Reviewed-by: Alberto Garcia Reviewed-by: Eric Blake --- block/quorum.c | 128 +++++++++++++++++++++++-------------------------- 1 file changed, 59 insertions(+), 69 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index e044010717..2c280bbef7 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -130,7 +130,6 @@ struct QuorumAIOCB { QuorumVotes votes; bool is_read; - bool has_completed; int vote_ret; int children_read; /* how many children have been read from */ }; @@ -140,8 +139,6 @@ typedef struct QuorumCo { int idx; } QuorumCo; -static bool quorum_vote(QuorumAIOCB *acb); - static void quorum_aio_finalize(QuorumAIOCB *acb) { g_free(acb->qcrs); @@ -178,7 +175,6 @@ static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs, acb->rewrite_count = 0; acb->votes.compare = quorum_sha256_compare; QLIST_INIT(&acb->votes.vote_list); - acb->has_completed = false; acb->is_read = false; acb->vote_ret = 0; @@ -231,13 +227,6 @@ static void quorum_rewrite_aio_cb(void *opaque, int ret) /* one less rewrite to do */ acb->rewrite_count--; - - /* wait until all rewrite callbacks have completed */ - if (acb->rewrite_count) { - return; - } - - acb->has_completed = true; qemu_coroutine_enter_if_inactive(acb->co); } @@ -288,45 +277,6 @@ static int quorum_fifo_aio_cb(void *opaque, int ret) return ret; } -static void quorum_aio_cb(void *opaque, int ret) -{ - QuorumChildRequest *sacb = opaque; - QuorumAIOCB *acb = sacb->parent; - BDRVQuorumState *s = acb->bs->opaque; - bool rewrite = false; - int i; - - sacb->ret = ret; - if (ret == 0) { - acb->success_count++; - } else { - quorum_report_bad_acb(sacb, ret); - } - acb->count++; - assert(acb->count <= s->num_children); - assert(acb->success_count <= s->num_children); - if (acb->count < s->num_children) { - return; - } - - /* Do the vote on read */ - if (acb->is_read) { - rewrite = quorum_vote(acb); - for (i = 0; i < s->num_children; i++) { - qemu_vfree(acb->qcrs[i].buf); - qemu_iovec_destroy(&acb->qcrs[i].qiov); - } - } else { - quorum_has_too_much_io_failed(acb); - } - - /* if no rewrite is done the code will finish right away */ - if (!rewrite) { - acb->has_completed = true; - qemu_coroutine_enter_if_inactive(acb->co); - } -} - static void quorum_report_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb, QuorumVoteValue *value) @@ -557,17 +507,16 @@ static int quorum_vote_error(QuorumAIOCB *acb) return ret; } -static bool quorum_vote(QuorumAIOCB *acb) +static void quorum_vote(QuorumAIOCB *acb) { bool quorum = true; - bool rewrite = false; int i, j, ret; QuorumVoteValue hash; BDRVQuorumState *s = acb->bs->opaque; QuorumVoteVersion *winner; if (quorum_has_too_much_io_failed(acb)) { - return false; + return; } /* get the index of the first successful read */ @@ -595,7 +544,7 @@ static bool quorum_vote(QuorumAIOCB *acb) /* Every successful read agrees */ if (quorum) { quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov); - return false; + return; } /* compute hashes for each successful read, also store indexes */ @@ -630,13 +579,12 @@ static bool quorum_vote(QuorumAIOCB *acb) /* corruption correction is enabled */ if (s->rewrite_corrupted) { - rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value); + quorum_rewrite_bad_versions(s, acb, &winner->value); } free_exit: /* free lists */ quorum_free_vote_list(&acb->votes); - return rewrite; } static void read_quorum_children_entry(void *opaque) @@ -645,13 +593,28 @@ static void read_quorum_children_entry(void *opaque) QuorumAIOCB *acb = co->acb; BDRVQuorumState *s = acb->bs->opaque; int i = co->idx; - int ret; + QuorumChildRequest *sacb = &acb->qcrs[i]; - acb->qcrs[i].bs = s->children[i]->bs; - ret = bdrv_co_preadv(s->children[i], acb->sector_num * BDRV_SECTOR_SIZE, - acb->nb_sectors * BDRV_SECTOR_SIZE, - &acb->qcrs[i].qiov, 0); - quorum_aio_cb(&acb->qcrs[i], ret); + sacb->bs = s->children[i]->bs; + sacb->ret = bdrv_co_preadv(s->children[i], + acb->sector_num * BDRV_SECTOR_SIZE, + acb->nb_sectors * BDRV_SECTOR_SIZE, + &acb->qcrs[i].qiov, 0); + + if (sacb->ret == 0) { + acb->success_count++; + } else { + quorum_report_bad_acb(sacb, sacb->ret); + } + + acb->count++; + assert(acb->count <= s->num_children); + assert(acb->success_count <= s->num_children); + + /* Wake up the caller after the last read */ + if (acb->count == s->num_children) { + qemu_coroutine_enter_if_inactive(acb->co); + } } static int read_quorum_children(QuorumAIOCB *acb) @@ -677,7 +640,18 @@ static int read_quorum_children(QuorumAIOCB *acb) qemu_coroutine_enter(co); } - if (!acb->has_completed) { + while (acb->count < s->num_children) { + qemu_coroutine_yield(); + } + + /* Do the vote on read */ + quorum_vote(acb); + for (i = 0; i < s->num_children; i++) { + qemu_vfree(acb->qcrs[i].buf); + qemu_iovec_destroy(&acb->qcrs[i].qiov); + } + + while (acb->rewrite_count) { qemu_coroutine_yield(); } @@ -727,12 +701,26 @@ static void write_quorum_entry(void *opaque) QuorumAIOCB *acb = co->acb; BDRVQuorumState *s = acb->bs->opaque; int i = co->idx; - int ret; + QuorumChildRequest *sacb = &acb->qcrs[i]; - acb->qcrs[i].bs = s->children[i]->bs; - ret = bdrv_co_pwritev(s->children[i], acb->sector_num * BDRV_SECTOR_SIZE, - acb->nb_sectors * BDRV_SECTOR_SIZE, acb->qiov, 0); - quorum_aio_cb(&acb->qcrs[i], ret); + sacb->bs = s->children[i]->bs; + sacb->ret = bdrv_co_pwritev(s->children[i], + acb->sector_num * BDRV_SECTOR_SIZE, + acb->nb_sectors * BDRV_SECTOR_SIZE, + acb->qiov, 0); + if (sacb->ret == 0) { + acb->success_count++; + } else { + quorum_report_bad_acb(sacb, sacb->ret); + } + acb->count++; + assert(acb->count <= s->num_children); + assert(acb->success_count <= s->num_children); + + /* Wake up the caller after the last write */ + if (acb->count == s->num_children) { + qemu_coroutine_enter_if_inactive(acb->co); + } } static int quorum_co_writev(BlockDriverState *bs, @@ -754,10 +742,12 @@ static int quorum_co_writev(BlockDriverState *bs, qemu_coroutine_enter(co); } - if (!acb->has_completed) { + while (acb->count < s->num_children) { qemu_coroutine_yield(); } + quorum_has_too_much_io_failed(acb); + ret = acb->vote_ret; quorum_aio_finalize(acb); From dee66e2882e5b1fdf6184da7555e891b9057c944 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 10 Nov 2016 16:50:16 +0100 Subject: [PATCH 07/14] quorum: Avoid bdrv_aio_writev() for rewrites Replacing it with bdrv_co_pwritev() prepares us for byte granularity requests and gets us rid of the last bdrv_aio_*() user in quorum. Signed-off-by: Kevin Wolf Reviewed-by: Alberto Garcia Reviewed-by: Eric Blake --- block/quorum.c | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index 2c280bbef7..690fd36740 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -221,15 +221,6 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb) return false; } -static void quorum_rewrite_aio_cb(void *opaque, int ret) -{ - QuorumAIOCB *acb = opaque; - - /* one less rewrite to do */ - acb->rewrite_count--; - qemu_coroutine_enter_if_inactive(acb->co); -} - static int read_fifo_child(QuorumAIOCB *acb); static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source) @@ -296,7 +287,27 @@ static void quorum_report_bad_versions(BDRVQuorumState *s, } } -static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb, +static void quorum_rewrite_entry(void *opaque) +{ + QuorumCo *co = opaque; + QuorumAIOCB *acb = co->acb; + BDRVQuorumState *s = acb->bs->opaque; + + /* Ignore any errors, it's just a correction attempt for already + * corrupted data. */ + bdrv_co_pwritev(s->children[co->idx], + acb->sector_num * BDRV_SECTOR_SIZE, + acb->nb_sectors * BDRV_SECTOR_SIZE, + acb->qiov, 0); + + /* Wake up the caller after the last rewrite */ + acb->rewrite_count--; + if (!acb->rewrite_count) { + qemu_coroutine_enter_if_inactive(acb->co); + } +} + +static bool quorum_rewrite_bad_versions(QuorumAIOCB *acb, QuorumVoteValue *value) { QuorumVoteVersion *version; @@ -315,7 +326,7 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb, } } - /* quorum_rewrite_aio_cb will count down this to zero */ + /* quorum_rewrite_entry will count down this to zero */ acb->rewrite_count = count; /* now fire the correcting rewrites */ @@ -324,9 +335,14 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb, continue; } QLIST_FOREACH(item, &version->items, next) { - bdrv_aio_writev(s->children[item->index], acb->sector_num, - acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb, - acb); + Coroutine *co; + QuorumCo data = { + .acb = acb, + .idx = item->index, + }; + + co = qemu_coroutine_create(quorum_rewrite_entry, &data); + qemu_coroutine_enter(co); } } @@ -579,7 +595,7 @@ static void quorum_vote(QuorumAIOCB *acb) /* corruption correction is enabled */ if (s->rewrite_corrupted) { - quorum_rewrite_bad_versions(s, acb, &winner->value); + quorum_rewrite_bad_versions(acb, &winner->value); } free_exit: From 6847da380805c46e21162dac3e4b4123399769d0 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 10 Nov 2016 17:22:07 +0100 Subject: [PATCH 08/14] quorum: Implement .bdrv_co_preadv/pwritev() This enables byte granularity requests on quorum nodes. Note that the QMP events emitted by the driver are an external API that we were careless enough to define as sector based. The offset and length of requests reported in events are rounded therefore. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Alberto Garcia --- block/quorum.c | 81 +++++++++++++++++++++++--------------------------- 1 file changed, 38 insertions(+), 43 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index 690fd36740..4bba9fd78c 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -114,8 +114,8 @@ struct QuorumAIOCB { Coroutine *co; /* Request metadata */ - uint64_t sector_num; - int nb_sectors; + uint64_t offset; + uint64_t bytes; QEMUIOVector *qiov; /* calling IOV */ @@ -157,8 +157,8 @@ static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b) static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs, QEMUIOVector *qiov, - uint64_t sector_num, - int nb_sectors) + uint64_t offset, + uint64_t bytes) { BDRVQuorumState *s = bs->opaque; QuorumAIOCB *acb = g_new(QuorumAIOCB, 1); @@ -166,8 +166,8 @@ static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs, acb->co = qemu_coroutine_self(); acb->bs = bs; - acb->sector_num = sector_num; - acb->nb_sectors = nb_sectors; + acb->offset = offset; + acb->bytes = bytes; acb->qiov = qiov; acb->qcrs = g_new0(QuorumChildRequest, s->num_children); acb->count = 0; @@ -187,23 +187,30 @@ static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs, return acb; } -static void quorum_report_bad(QuorumOpType type, uint64_t sector_num, - int nb_sectors, char *node_name, int ret) +static void quorum_report_bad(QuorumOpType type, uint64_t offset, + uint64_t bytes, char *node_name, int ret) { const char *msg = NULL; + int64_t start_sector = offset / BDRV_SECTOR_SIZE; + int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE); + if (ret < 0) { msg = strerror(-ret); } - qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, - sector_num, nb_sectors, &error_abort); + qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, start_sector, + end_sector - start_sector, &error_abort); } static void quorum_report_failure(QuorumAIOCB *acb) { const char *reference = bdrv_get_device_or_node_name(acb->bs); - qapi_event_send_quorum_failure(reference, acb->sector_num, - acb->nb_sectors, &error_abort); + int64_t start_sector = acb->offset / BDRV_SECTOR_SIZE; + int64_t end_sector = DIV_ROUND_UP(acb->offset + acb->bytes, + BDRV_SECTOR_SIZE); + + qapi_event_send_quorum_failure(reference, start_sector, + end_sector - start_sector, &error_abort); } static int quorum_vote_error(QuorumAIOCB *acb); @@ -240,8 +247,7 @@ static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret) { QuorumAIOCB *acb = sacb->parent; QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE; - quorum_report_bad(type, acb->sector_num, acb->nb_sectors, - sacb->bs->node_name, ret); + quorum_report_bad(type, acb->offset, acb->bytes, sacb->bs->node_name, ret); } static int quorum_fifo_aio_cb(void *opaque, int ret) @@ -280,8 +286,7 @@ static void quorum_report_bad_versions(BDRVQuorumState *s, continue; } QLIST_FOREACH(item, &version->items, next) { - quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num, - acb->nb_sectors, + quorum_report_bad(QUORUM_OP_TYPE_READ, acb->offset, acb->bytes, s->children[item->index]->bs->node_name, 0); } } @@ -295,9 +300,7 @@ static void quorum_rewrite_entry(void *opaque) /* Ignore any errors, it's just a correction attempt for already * corrupted data. */ - bdrv_co_pwritev(s->children[co->idx], - acb->sector_num * BDRV_SECTOR_SIZE, - acb->nb_sectors * BDRV_SECTOR_SIZE, + bdrv_co_pwritev(s->children[co->idx], acb->offset, acb->bytes, acb->qiov, 0); /* Wake up the caller after the last rewrite */ @@ -462,8 +465,8 @@ static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb, va_list ap; va_start(ap, fmt); - fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ", - acb->sector_num, acb->nb_sectors); + fprintf(stderr, "quorum: offset=%" PRIu64 " bytes=%" PRIu64 " ", + acb->offset, acb->bytes); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); va_end(ap); @@ -481,9 +484,8 @@ static bool quorum_compare(QuorumAIOCB *acb, if (s->is_blkverify) { offset = qemu_iovec_compare(a, b); if (offset != -1) { - quorum_err(acb, "contents mismatch in sector %" PRId64, - acb->sector_num + - (uint64_t)(offset / BDRV_SECTOR_SIZE)); + quorum_err(acb, "contents mismatch at offset %" PRIu64, + acb->offset + offset); } return true; } @@ -612,9 +614,7 @@ static void read_quorum_children_entry(void *opaque) QuorumChildRequest *sacb = &acb->qcrs[i]; sacb->bs = s->children[i]->bs; - sacb->ret = bdrv_co_preadv(s->children[i], - acb->sector_num * BDRV_SECTOR_SIZE, - acb->nb_sectors * BDRV_SECTOR_SIZE, + sacb->ret = bdrv_co_preadv(s->children[i], acb->offset, acb->bytes, &acb->qcrs[i].qiov, 0); if (sacb->ret == 0) { @@ -683,19 +683,17 @@ static int read_fifo_child(QuorumAIOCB *acb) int ret; acb->qcrs[n].bs = s->children[n]->bs; - ret = bdrv_co_preadv(s->children[n], acb->sector_num * BDRV_SECTOR_SIZE, - acb->nb_sectors * BDRV_SECTOR_SIZE, acb->qiov, 0); + ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes, acb->qiov, 0); ret = quorum_fifo_aio_cb(&acb->qcrs[n], ret); return ret; } -static int quorum_co_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - QEMUIOVector *qiov) +static int quorum_co_preadv(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, int flags) { BDRVQuorumState *s = bs->opaque; - QuorumAIOCB *acb = quorum_aio_get(bs, qiov, sector_num, nb_sectors); + QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes); int ret; acb->is_read = true; @@ -720,9 +718,7 @@ static void write_quorum_entry(void *opaque) QuorumChildRequest *sacb = &acb->qcrs[i]; sacb->bs = s->children[i]->bs; - sacb->ret = bdrv_co_pwritev(s->children[i], - acb->sector_num * BDRV_SECTOR_SIZE, - acb->nb_sectors * BDRV_SECTOR_SIZE, + sacb->ret = bdrv_co_pwritev(s->children[i], acb->offset, acb->bytes, acb->qiov, 0); if (sacb->ret == 0) { acb->success_count++; @@ -739,12 +735,11 @@ static void write_quorum_entry(void *opaque) } } -static int quorum_co_writev(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - QEMUIOVector *qiov) +static int quorum_co_pwritev(BlockDriverState *bs, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, int flags) { BDRVQuorumState *s = bs->opaque; - QuorumAIOCB *acb = quorum_aio_get(bs, qiov, sector_num, nb_sectors); + QuorumAIOCB *acb = quorum_aio_get(bs, qiov, offset, bytes); int i, ret; for (i = 0; i < s->num_children; i++) { @@ -811,7 +806,7 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs) result = bdrv_co_flush(s->children[i]->bs); if (result) { quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0, - bdrv_nb_sectors(s->children[i]->bs), + bdrv_getlength(s->children[i]->bs), s->children[i]->bs->node_name, result); result_value.l = result; quorum_count_vote(&error_votes, &result_value, i); @@ -1144,8 +1139,8 @@ static BlockDriver bdrv_quorum = { .bdrv_getlength = quorum_getlength, - .bdrv_co_readv = quorum_co_readv, - .bdrv_co_writev = quorum_co_writev, + .bdrv_co_preadv = quorum_co_preadv, + .bdrv_co_pwritev = quorum_co_pwritev, .bdrv_add_child = quorum_add_child, .bdrv_del_child = quorum_del_child, From a7e159025ed439011a47d56af5f34729bbb7247c Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 10 Nov 2016 17:40:34 +0100 Subject: [PATCH 09/14] quorum: Inline quorum_fifo_aio_cb() Inlining the function removes some boilerplace code and replaces recursion by a simple loop, so the code becomes somewhat easier to understand. Signed-off-by: Kevin Wolf Reviewed-by: Alberto Garcia Reviewed-by: Eric Blake --- block/quorum.c | 42 +++++++++++++----------------------------- 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index 4bba9fd78c..e2443897b5 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -250,30 +250,6 @@ static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret) quorum_report_bad(type, acb->offset, acb->bytes, sacb->bs->node_name, ret); } -static int quorum_fifo_aio_cb(void *opaque, int ret) -{ - QuorumChildRequest *sacb = opaque; - QuorumAIOCB *acb = sacb->parent; - BDRVQuorumState *s = acb->bs->opaque; - - assert(acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO); - - if (ret < 0) { - quorum_report_bad_acb(sacb, ret); - - /* We try to read next child in FIFO order if we fail to read */ - if (acb->children_read < s->num_children) { - return read_fifo_child(acb); - } - } - - acb->vote_ret = ret; - - /* FIXME: rewrite failed children if acb->children_read > 1? */ - - return ret; -} - static void quorum_report_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb, QuorumVoteValue *value) @@ -679,12 +655,20 @@ static int read_quorum_children(QuorumAIOCB *acb) static int read_fifo_child(QuorumAIOCB *acb) { BDRVQuorumState *s = acb->bs->opaque; - int n = acb->children_read++; - int ret; + int n, ret; - acb->qcrs[n].bs = s->children[n]->bs; - ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes, acb->qiov, 0); - ret = quorum_fifo_aio_cb(&acb->qcrs[n], ret); + /* We try to read the next child in FIFO order if we failed to read */ + do { + n = acb->children_read++; + acb->qcrs[n].bs = s->children[n]->bs; + ret = bdrv_co_preadv(s->children[n], acb->offset, acb->bytes, + acb->qiov, 0); + if (ret < 0) { + quorum_report_bad_acb(&acb->qcrs[n], ret); + } + } while (ret < 0 && acb->children_read < s->num_children); + + /* FIXME: rewrite failed children if acb->children_read > 1? */ return ret; } From 7c37f941d028e2a1bf350ffb292684f2e1a292c7 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 22 Nov 2016 12:49:49 +0100 Subject: [PATCH 10/14] quorum: Clean up quorum_aio_get() Make sure that all fields of the new QuorumAIOCB are zeroed when the function returns even without explicitly setting them. This will protect us when new fields are added, removes some explicit zero assignment and makes the code a little nicer to read. Suggested-by: Eric Blake Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Alberto Garcia --- block/quorum.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/block/quorum.c b/block/quorum.c index e2443897b5..86e2072dce 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -164,20 +164,17 @@ static QuorumAIOCB *quorum_aio_get(BlockDriverState *bs, QuorumAIOCB *acb = g_new(QuorumAIOCB, 1); int i; - acb->co = qemu_coroutine_self(); - acb->bs = bs; - acb->offset = offset; - acb->bytes = bytes; - acb->qiov = qiov; - acb->qcrs = g_new0(QuorumChildRequest, s->num_children); - acb->count = 0; - acb->success_count = 0; - acb->rewrite_count = 0; - acb->votes.compare = quorum_sha256_compare; - QLIST_INIT(&acb->votes.vote_list); - acb->is_read = false; - acb->vote_ret = 0; + *acb = (QuorumAIOCB) { + .co = qemu_coroutine_self(), + .bs = bs, + .offset = offset, + .bytes = bytes, + .qiov = qiov, + .votes.compare = quorum_sha256_compare, + .votes.vote_list = QLIST_HEAD_INITIALIZER(acb.votes.vote_list), + }; + acb->qcrs = g_new0(QuorumChildRequest, s->num_children); for (i = 0; i < s->num_children; i++) { acb->qcrs[i].buf = NULL; acb->qcrs[i].ret = 0; From 7c3a998531da0a77baaa2a6ccb48b8a7b5eb14d5 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 4 Nov 2016 21:13:45 +0100 Subject: [PATCH 11/14] blkdebug: Implement bdrv_co_preadv/pwritev/flush This enables byte granularity requests for blkdebug, and at the same time gets us rid of another user of the BDS-level AIO emulation. Note that unless align=512 is specified, this can behave subtly different from the old behaviour because bdrv_co_preadv/pwritev don't have to perform alignment adjustments any more. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block/blkdebug.c | 86 ++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 46 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index 4127571454..acccf85666 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -58,10 +58,6 @@ typedef struct BlkdebugSuspendedReq { QLIST_ENTRY(BlkdebugSuspendedReq) next; } BlkdebugSuspendedReq; -static const AIOCBInfo blkdebug_aiocb_info = { - .aiocb_size = sizeof(BlkdebugAIOCB), -}; - enum { ACTION_INJECT_ERROR, ACTION_SET_STATE, @@ -77,7 +73,7 @@ typedef struct BlkdebugRule { int error; int immediately; int once; - int64_t sector; + int64_t offset; } inject; struct { int new_state; @@ -174,6 +170,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp) const char* event_name; BlkdebugEvent event; struct BlkdebugRule *rule; + int64_t sector; /* Find the right event for the rule */ event_name = qemu_opt_get(opts, "event"); @@ -200,7 +197,9 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp) rule->options.inject.once = qemu_opt_get_bool(opts, "once", 0); rule->options.inject.immediately = qemu_opt_get_bool(opts, "immediately", 0); - rule->options.inject.sector = qemu_opt_get_number(opts, "sector", -1); + sector = qemu_opt_get_number(opts, "sector", -1); + rule->options.inject.offset = + sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE; break; case ACTION_SET_STATE: @@ -408,17 +407,14 @@ out: static void error_callback_bh(void *opaque) { - struct BlkdebugAIOCB *acb = opaque; - acb->common.cb(acb->common.opaque, acb->ret); - qemu_aio_unref(acb); + Coroutine *co = opaque; + qemu_coroutine_enter(co); } -static BlockAIOCB *inject_error(BlockDriverState *bs, - BlockCompletionFunc *cb, void *opaque, BlkdebugRule *rule) +static int inject_error(BlockDriverState *bs, BlkdebugRule *rule) { BDRVBlkdebugState *s = bs->opaque; int error = rule->options.inject.error; - struct BlkdebugAIOCB *acb; bool immediately = rule->options.inject.immediately; if (rule->options.inject.once) { @@ -426,81 +422,79 @@ static BlockAIOCB *inject_error(BlockDriverState *bs, remove_rule(rule); } - if (immediately) { - return NULL; + if (!immediately) { + aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh, + qemu_coroutine_self()); + qemu_coroutine_yield(); } - acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque); - acb->ret = -error; - - aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh, acb); - - return &acb->common; + return -error; } -static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) +static int coroutine_fn +blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) { BDRVBlkdebugState *s = bs->opaque; BlkdebugRule *rule = NULL; QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { - if (rule->options.inject.sector == -1 || - (rule->options.inject.sector >= sector_num && - rule->options.inject.sector < sector_num + nb_sectors)) { + uint64_t inject_offset = rule->options.inject.offset; + + if (inject_offset == -1 || + (inject_offset >= offset && inject_offset < offset + bytes)) + { break; } } if (rule && rule->options.inject.error) { - return inject_error(bs, cb, opaque, rule); + return inject_error(bs, rule); } - return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, - cb, opaque); + return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); } -static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) +static int coroutine_fn +blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) { BDRVBlkdebugState *s = bs->opaque; BlkdebugRule *rule = NULL; QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { - if (rule->options.inject.sector == -1 || - (rule->options.inject.sector >= sector_num && - rule->options.inject.sector < sector_num + nb_sectors)) { + uint64_t inject_offset = rule->options.inject.offset; + + if (inject_offset == -1 || + (inject_offset >= offset && inject_offset < offset + bytes)) + { break; } } if (rule && rule->options.inject.error) { - return inject_error(bs, cb, opaque, rule); + return inject_error(bs, rule); } - return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, - cb, opaque); + return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); } -static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs, - BlockCompletionFunc *cb, void *opaque) +static int blkdebug_co_flush(BlockDriverState *bs) { BDRVBlkdebugState *s = bs->opaque; BlkdebugRule *rule = NULL; QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) { - if (rule->options.inject.sector == -1) { + if (rule->options.inject.offset == -1) { break; } } if (rule && rule->options.inject.error) { - return inject_error(bs, cb, opaque, rule); + return inject_error(bs, rule); } - return bdrv_aio_flush(bs->file->bs, cb, opaque); + return bdrv_co_flush(bs->file->bs); } @@ -752,9 +746,9 @@ static BlockDriver bdrv_blkdebug = { .bdrv_refresh_filename = blkdebug_refresh_filename, .bdrv_refresh_limits = blkdebug_refresh_limits, - .bdrv_aio_readv = blkdebug_aio_readv, - .bdrv_aio_writev = blkdebug_aio_writev, - .bdrv_aio_flush = blkdebug_aio_flush, + .bdrv_co_preadv = blkdebug_co_preadv, + .bdrv_co_pwritev = blkdebug_co_pwritev, + .bdrv_co_flush_to_disk = blkdebug_co_flush, .bdrv_debug_event = blkdebug_debug_event, .bdrv_debug_breakpoint = blkdebug_debug_breakpoint, From 44b6789299a8acca3f25331bc411055cafc7bb06 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 4 Nov 2016 21:13:45 +0100 Subject: [PATCH 12/14] blkverify: Implement bdrv_co_preadv/pwritev/flush This enables byte granularity requests for blkverify, and at the same time gets us rid of another user of the BDS-level AIO emulation. The reference output of a test case must be changed because the verification failure message reports byte offsets instead of sectors now. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block/blkverify.c | 205 ++++++++++++++++++------------------- tests/qemu-iotests/071.out | 8 +- 2 files changed, 102 insertions(+), 111 deletions(-) diff --git a/block/blkverify.c b/block/blkverify.c index 28f9af6dba..43a940c2f5 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -19,38 +19,36 @@ typedef struct { BdrvChild *test_file; } BDRVBlkverifyState; -typedef struct BlkverifyAIOCB BlkverifyAIOCB; -struct BlkverifyAIOCB { - BlockAIOCB common; +typedef struct BlkverifyRequest { + Coroutine *co; + BlockDriverState *bs; /* Request metadata */ bool is_write; - int64_t sector_num; - int nb_sectors; + uint64_t offset; + uint64_t bytes; + int flags; + + int (*request_fn)(BdrvChild *, int64_t, unsigned int, QEMUIOVector *, + BdrvRequestFlags); + + int ret; /* test image result */ + int raw_ret; /* raw image result */ - int ret; /* first completed request's result */ unsigned int done; /* completion counter */ QEMUIOVector *qiov; /* user I/O vector */ - QEMUIOVector raw_qiov; /* cloned I/O vector for raw file */ - void *buf; /* buffer for raw file I/O */ + QEMUIOVector *raw_qiov; /* cloned I/O vector for raw file */ +} BlkverifyRequest; - void (*verify)(BlkverifyAIOCB *acb); -}; - -static const AIOCBInfo blkverify_aiocb_info = { - .aiocb_size = sizeof(BlkverifyAIOCB), -}; - -static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb, +static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyRequest *r, const char *fmt, ...) { va_list ap; va_start(ap, fmt); - fprintf(stderr, "blkverify: %s sector_num=%" PRId64 " nb_sectors=%d ", - acb->is_write ? "write" : "read", acb->sector_num, - acb->nb_sectors); + fprintf(stderr, "blkverify: %s offset=%" PRId64 " bytes=%" PRId64 " ", + r->is_write ? "write" : "read", r->offset, r->bytes); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); va_end(ap); @@ -166,113 +164,106 @@ static int64_t blkverify_getlength(BlockDriverState *bs) return bdrv_getlength(s->test_file->bs); } -static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write, - int64_t sector_num, QEMUIOVector *qiov, - int nb_sectors, - BlockCompletionFunc *cb, - void *opaque) +static void coroutine_fn blkverify_do_test_req(void *opaque) { - BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque); + BlkverifyRequest *r = opaque; + BDRVBlkverifyState *s = r->bs->opaque; - acb->is_write = is_write; - acb->sector_num = sector_num; - acb->nb_sectors = nb_sectors; - acb->ret = -EINPROGRESS; - acb->done = 0; - acb->qiov = qiov; - acb->buf = NULL; - acb->verify = NULL; - return acb; + r->ret = r->request_fn(s->test_file, r->offset, r->bytes, r->qiov, + r->flags); + r->done++; + qemu_coroutine_enter_if_inactive(r->co); } -static void blkverify_aio_bh(void *opaque) +static void coroutine_fn blkverify_do_raw_req(void *opaque) { - BlkverifyAIOCB *acb = opaque; + BlkverifyRequest *r = opaque; - if (acb->buf) { - qemu_iovec_destroy(&acb->raw_qiov); - qemu_vfree(acb->buf); + r->raw_ret = r->request_fn(r->bs->file, r->offset, r->bytes, r->raw_qiov, + r->flags); + r->done++; + qemu_coroutine_enter_if_inactive(r->co); +} + +static int coroutine_fn +blkverify_co_prwv(BlockDriverState *bs, BlkverifyRequest *r, uint64_t offset, + uint64_t bytes, QEMUIOVector *qiov, QEMUIOVector *raw_qiov, + int flags, bool is_write) +{ + Coroutine *co_a, *co_b; + + *r = (BlkverifyRequest) { + .co = qemu_coroutine_self(), + .bs = bs, + .offset = offset, + .bytes = bytes, + .qiov = qiov, + .raw_qiov = raw_qiov, + .flags = flags, + .is_write = is_write, + .request_fn = is_write ? bdrv_co_pwritev : bdrv_co_preadv, + }; + + co_a = qemu_coroutine_create(blkverify_do_test_req, r); + co_b = qemu_coroutine_create(blkverify_do_raw_req, r); + + qemu_coroutine_enter(co_a); + qemu_coroutine_enter(co_b); + + while (r->done < 2) { + qemu_coroutine_yield(); } - acb->common.cb(acb->common.opaque, acb->ret); - qemu_aio_unref(acb); -} -static void blkverify_aio_cb(void *opaque, int ret) -{ - BlkverifyAIOCB *acb = opaque; - - switch (++acb->done) { - case 1: - acb->ret = ret; - break; - - case 2: - if (acb->ret != ret) { - blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret); - } - - if (acb->verify) { - acb->verify(acb); - } - - aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs), - blkverify_aio_bh, acb); - break; + if (r->ret != r->raw_ret) { + blkverify_err(r, "return value mismatch %d != %d", r->ret, r->raw_ret); } + + return r->ret; } -static void blkverify_verify_readv(BlkverifyAIOCB *acb) +static int coroutine_fn +blkverify_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) { - ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov); - if (offset != -1) { - blkverify_err(acb, "contents mismatch in sector %" PRId64, - acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE)); + BlkverifyRequest r; + QEMUIOVector raw_qiov; + void *buf; + ssize_t cmp_offset; + int ret; + + buf = qemu_blockalign(bs->file->bs, qiov->size); + qemu_iovec_init(&raw_qiov, qiov->niov); + qemu_iovec_clone(&raw_qiov, qiov, buf); + + ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags, + false); + + cmp_offset = qemu_iovec_compare(qiov, &raw_qiov); + if (cmp_offset != -1) { + blkverify_err(&r, "contents mismatch at offset %" PRId64, + offset + cmp_offset); } + + qemu_iovec_destroy(&raw_qiov); + qemu_vfree(buf); + + return ret; } -static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) +static int coroutine_fn +blkverify_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) { - BDRVBlkverifyState *s = bs->opaque; - BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov, - nb_sectors, cb, opaque); - - acb->verify = blkverify_verify_readv; - acb->buf = qemu_blockalign(bs->file->bs, qiov->size); - qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov); - qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf); - - bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors, - blkverify_aio_cb, acb); - bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors, - blkverify_aio_cb, acb); - return &acb->common; + BlkverifyRequest r; + return blkverify_co_prwv(bs, &r, offset, bytes, qiov, qiov, flags, true); } -static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - BDRVBlkverifyState *s = bs->opaque; - BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov, - nb_sectors, cb, opaque); - - bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors, - blkverify_aio_cb, acb); - bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, - blkverify_aio_cb, acb); - return &acb->common; -} - -static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs, - BlockCompletionFunc *cb, - void *opaque) +static int blkverify_co_flush(BlockDriverState *bs) { BDRVBlkverifyState *s = bs->opaque; /* Only flush test file, the raw file is not important */ - return bdrv_aio_flush(s->test_file->bs, cb, opaque); + return bdrv_co_flush(s->test_file->bs); } static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs, @@ -332,9 +323,9 @@ static BlockDriver bdrv_blkverify = { .bdrv_getlength = blkverify_getlength, .bdrv_refresh_filename = blkverify_refresh_filename, - .bdrv_aio_readv = blkverify_aio_readv, - .bdrv_aio_writev = blkverify_aio_writev, - .bdrv_aio_flush = blkverify_aio_flush, + .bdrv_co_preadv = blkverify_co_preadv, + .bdrv_co_pwritev = blkverify_co_pwritev, + .bdrv_co_flush = blkverify_co_flush, .is_filter = true, .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter, diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out index 8ff423f56b..dd879f1212 100644 --- a/tests/qemu-iotests/071.out +++ b/tests/qemu-iotests/071.out @@ -12,7 +12,7 @@ read 512/512 bytes at offset 229376 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0 +blkverify: read offset=0 bytes=512 contents mismatch at offset 0 === Testing blkverify through file blockref === @@ -26,7 +26,7 @@ read 512/512 bytes at offset 229376 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0 +blkverify: read offset=0 bytes=512 contents mismatch at offset 0 === Testing blkdebug through filename === @@ -56,7 +56,7 @@ QMP_VERSION {"return": {}} {"return": {}} {"return": {}} -blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0 +blkverify: read offset=0 bytes=512 contents mismatch at offset 0 === Testing blkverify on existing raw block device === @@ -66,7 +66,7 @@ QMP_VERSION {"return": {}} {"return": {}} {"return": {}} -blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0 +blkverify: read offset=0 bytes=512 contents mismatch at offset 0 === Testing blkdebug's set-state through QMP === From 2e6fc7eb1a4af1b127df5f07b8bb28af891946fa Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Fri, 2 Dec 2016 13:48:53 -0600 Subject: [PATCH 13/14] block: Rename raw_bsd to raw-format.c Given that we have raw-win32.c and raw-posix.c, my initial guess at raw_bsd.c was that it was for dealing with raw files using code specific to the BSD operating system (beyond what raw-posix could do). Not so - this name was chosen back in commit e1c66c6 to distinguish that it was a BSD licensed file, in contrast to the then-existing raw.c with an unclear and potentially unusable license. But since it has been more than three years since the rewrite, it's time to pick a more useful name for this file to avoid this type of confusion to future contributors that don't know the backstory, as none of our other files are named solely by the license they use. In reality, this file deals with the raw format, which is useful with any number of protocols, while raw-{win32,posix} deal with the file protocol (and in turn, that protocol is not limited to use with the raw format). So rename raw_bsd to raw-format.c. We could have also used the shorter name raw.c, except that collides with the earlier use of that filename for a different license, and it's better to be safe than risk license pollution. The next patch will also rename raw-win32.c and raw-posix.c to further distinguish the difference in roles. It doesn't hurt that this gets rid of an underscore in the filename, thereby making tab-completion on 'ra' easier (now I don't have to type the shift key, which slows things down :) Suggested-by: Daniel P. Berrange Signed-off-by: Eric Blake Reviewed-by: Laszlo Ersek Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- MAINTAINERS | 2 +- block/Makefile.objs | 2 +- block/{raw_bsd.c => raw-format.c} | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename block/{raw_bsd.c => raw-format.c} (99%) diff --git a/MAINTAINERS b/MAINTAINERS index 585cd5abd7..044a32488d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1722,7 +1722,7 @@ F: block/linux-aio.c F: include/block/raw-aio.h F: block/raw-posix.c F: block/raw-win32.c -F: block/raw_bsd.c +F: block/raw-format.c F: block/win32-aio.c qcow2 diff --git a/block/Makefile.objs b/block/Makefile.objs index 67a036a1df..bde742f519 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -1,4 +1,4 @@ -block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o +block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o diff --git a/block/raw_bsd.c b/block/raw-format.c similarity index 99% rename from block/raw_bsd.c rename to block/raw-format.c index 8a5b9b0424..8404a82e0c 100644 --- a/block/raw_bsd.c +++ b/block/raw-format.c @@ -1,4 +1,4 @@ -/* BlockDriver implementation for "raw" +/* BlockDriver implementation for "raw" format driver * * Copyright (C) 2010-2016 Red Hat, Inc. * Copyright (C) 2010, Blue Swirl From c1bb86cd8ae67c14f79422b6e544d1e2bf40eeb2 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Fri, 2 Dec 2016 13:48:54 -0600 Subject: [PATCH 14/14] block: Rename raw-{posix,win32} to file-*.c These files deal with the file protocol, not the raw format (the file protocol is often used with other formats, and the raw format is not forced to use the file protocol). Rename things to make it a bit easier to follow. Suggested-by: Daniel P. Berrange Signed-off-by: Eric Blake Reviewed-by: John Snow Reviewed-by: Laszlo Ersek Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- MAINTAINERS | 4 ++-- block/Makefile.objs | 4 ++-- block/{raw-posix.c => file-posix.c} | 0 block/{raw-win32.c => file-win32.c} | 0 block/gluster.c | 4 ++-- block/trace-events | 4 ++-- configure | 2 +- include/block/block_int.h | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) rename block/{raw-posix.c => file-posix.c} (100%) rename block/{raw-win32.c => file-win32.c} (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 044a32488d..78687588f1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1720,9 +1720,9 @@ L: qemu-block@nongnu.org S: Supported F: block/linux-aio.c F: include/block/raw-aio.h -F: block/raw-posix.c -F: block/raw-win32.c F: block/raw-format.c +F: block/file-posix.c +F: block/file-win32.c F: block/win32-aio.c qcow2 diff --git a/block/Makefile.objs b/block/Makefile.objs index bde742f519..0b8fd06f27 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -6,8 +6,8 @@ block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o block-obj-y += quorum.o block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o block-obj-y += block-backend.o snapshot.o qapi.o -block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o -block-obj-$(CONFIG_POSIX) += raw-posix.o +block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o +block-obj-$(CONFIG_POSIX) += file-posix.o block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o block-obj-y += null.o mirror.o commit.o io.o block-obj-y += throttle-groups.o diff --git a/block/raw-posix.c b/block/file-posix.c similarity index 100% rename from block/raw-posix.c rename to block/file-posix.c diff --git a/block/raw-win32.c b/block/file-win32.c similarity index 100% rename from block/raw-win32.c rename to block/file-win32.c diff --git a/block/gluster.c b/block/gluster.c index a0a74e49fd..1a22f2982d 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -1253,7 +1253,7 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs) * If @start is in a trailing hole or beyond EOF, return -ENXIO. * If we can't find out, return a negative errno other than -ENXIO. * - * (Shamefully copied from raw-posix.c, only miniscule adaptions.) + * (Shamefully copied from file-posix.c, only miniscule adaptions.) */ static int find_allocation(BlockDriverState *bs, off_t start, off_t *data, off_t *hole) @@ -1349,7 +1349,7 @@ exit: * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes * beyond the end of the disk image it will be clamped. * - * (Based on raw_co_get_block_status() from raw-posix.c.) + * (Based on raw_co_get_block_status() from file-posix.c.) */ static int64_t coroutine_fn qemu_gluster_co_get_block_status( BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, diff --git a/block/trace-events b/block/trace-events index cfc05f2478..671a6a851c 100644 --- a/block/trace-events +++ b/block/trace-events @@ -53,8 +53,8 @@ qmp_block_job_resume(void *job) "job %p" qmp_block_job_complete(void *job) "job %p" qmp_block_stream(void *bs, void *job) "bs %p job %p" -# block/raw-win32.c -# block/raw-posix.c +# block/file-win32.c +# block/file-posix.c paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d" paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d" diff --git a/configure b/configure index 218df87d21..86f5214dd0 100755 --- a/configure +++ b/configure @@ -2750,7 +2750,7 @@ if compile_prog "" "" ; then fi ########################################## -# xfsctl() probe, used for raw-posix +# xfsctl() probe, used for file-posix.c if test "$xfs" != "no" ; then cat > $TMPC << EOF #include /* NULL */ diff --git a/include/block/block_int.h b/include/block/block_int.h index 83a423c580..4e4562d444 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -184,7 +184,7 @@ struct BlockDriver { /* * Flushes all data that was already written to the OS all the way down to - * the disk (for example raw-posix calls fsync()). + * the disk (for example file-posix.c calls fsync()). */ int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);