diff --git a/block/copy-before-write.c b/block/copy-before-write.c index c83c9c7c46..a8a06fdc09 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -33,12 +33,37 @@ #include "block/block-copy.h" #include "block/copy-before-write.h" +#include "block/reqlist.h" #include "qapi/qapi-visit-block-core.h" typedef struct BDRVCopyBeforeWriteState { BlockCopyState *bcs; BdrvChild *target; + + /* + * @lock: protects access to @access_bitmap, @done_bitmap and + * @frozen_read_reqs + */ + CoMutex lock; + + /* + * @access_bitmap: represents areas allowed for reading by fleecing user. + * Reading from non-dirty areas leads to -EACCES. + */ + BdrvDirtyBitmap *access_bitmap; + + /* + * @done_bitmap: represents areas that was successfully copied to @target by + * copy-before-write operations. + */ + BdrvDirtyBitmap *done_bitmap; + + /* + * @frozen_read_reqs: current read requests for fleecing user in bs->file + * node. These areas must not be rewritten by guest. + */ + BlockReqList frozen_read_reqs; } BDRVCopyBeforeWriteState; static coroutine_fn int cbw_co_preadv( @@ -48,10 +73,20 @@ static coroutine_fn int cbw_co_preadv( return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); } +/* + * Do copy-before-write operation. + * + * On failure guest request must be failed too. + * + * On success, we also wait for all in-flight fleecing read requests in source + * node, and it's guaranteed that after cbw_do_copy_before_write() successful + * return there are no such requests and they will never appear. + */ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs, uint64_t offset, uint64_t bytes, BdrvRequestFlags flags) { BDRVCopyBeforeWriteState *s = bs->opaque; + int ret; uint64_t off, end; int64_t cluster_size = block_copy_cluster_size(s->bcs); @@ -62,7 +97,17 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs, off = QEMU_ALIGN_DOWN(offset, cluster_size); end = QEMU_ALIGN_UP(offset + bytes, cluster_size); - return block_copy(s->bcs, off, end - off, true); + ret = block_copy(s->bcs, off, end - off, true); + if (ret < 0) { + return ret; + } + + WITH_QEMU_LOCK_GUARD(&s->lock) { + bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off); + reqlist_wait_all(&s->frozen_read_reqs, off, end - off, &s->lock); + } + + return 0; } static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs, @@ -110,6 +155,142 @@ static int coroutine_fn cbw_co_flush(BlockDriverState *bs) return bdrv_co_flush(bs->file->bs); } +/* + * If @offset not accessible - return NULL. + * + * Otherwise, set @pnum to some bytes that accessible from @file (@file is set + * to bs->file or to s->target). Return newly allocated BlockReq object that + * should be than passed to cbw_snapshot_read_unlock(). + * + * It's guaranteed that guest writes will not interact in the region until + * cbw_snapshot_read_unlock() called. + */ +static BlockReq *cbw_snapshot_read_lock(BlockDriverState *bs, + int64_t offset, int64_t bytes, + int64_t *pnum, BdrvChild **file) +{ + BDRVCopyBeforeWriteState *s = bs->opaque; + BlockReq *req = g_new(BlockReq, 1); + bool done; + + QEMU_LOCK_GUARD(&s->lock); + + if (bdrv_dirty_bitmap_next_zero(s->access_bitmap, offset, bytes) != -1) { + g_free(req); + return NULL; + } + + done = bdrv_dirty_bitmap_status(s->done_bitmap, offset, bytes, pnum); + if (done) { + /* + * Special invalid BlockReq, that is handled in + * cbw_snapshot_read_unlock(). We don't need to lock something to read + * from s->target. + */ + *req = (BlockReq) {.offset = -1, .bytes = -1}; + *file = s->target; + } else { + reqlist_init_req(&s->frozen_read_reqs, req, offset, bytes); + *file = bs->file; + } + + return req; +} + +static void cbw_snapshot_read_unlock(BlockDriverState *bs, BlockReq *req) +{ + BDRVCopyBeforeWriteState *s = bs->opaque; + + if (req->offset == -1 && req->bytes == -1) { + g_free(req); + return; + } + + QEMU_LOCK_GUARD(&s->lock); + + reqlist_remove_req(req); + g_free(req); +} + +static coroutine_fn int +cbw_co_preadv_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, size_t qiov_offset) +{ + BlockReq *req; + BdrvChild *file; + int ret; + + /* TODO: upgrade to async loop using AioTask */ + while (bytes) { + int64_t cur_bytes; + + req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &file); + if (!req) { + return -EACCES; + } + + ret = bdrv_co_preadv_part(file, offset, cur_bytes, + qiov, qiov_offset, 0); + cbw_snapshot_read_unlock(bs, req); + if (ret < 0) { + return ret; + } + + bytes -= cur_bytes; + offset += cur_bytes; + qiov_offset += cur_bytes; + } + + return 0; +} + +static int coroutine_fn +cbw_co_snapshot_block_status(BlockDriverState *bs, + bool want_zero, int64_t offset, int64_t bytes, + int64_t *pnum, int64_t *map, + BlockDriverState **file) +{ + BDRVCopyBeforeWriteState *s = bs->opaque; + BlockReq *req; + int ret; + int64_t cur_bytes; + BdrvChild *child; + + req = cbw_snapshot_read_lock(bs, offset, bytes, &cur_bytes, &child); + if (!req) { + return -EACCES; + } + + ret = bdrv_block_status(child->bs, offset, cur_bytes, pnum, map, file); + if (child == s->target) { + /* + * We refer to s->target only for areas that we've written to it. + * And we can not report unallocated blocks in s->target: this will + * break generic block-status-above logic, that will go to + * copy-before-write filtered child in this case. + */ + assert(ret & BDRV_BLOCK_ALLOCATED); + } + + cbw_snapshot_read_unlock(bs, req); + + return ret; +} + +static int coroutine_fn cbw_co_pdiscard_snapshot(BlockDriverState *bs, + int64_t offset, int64_t bytes) +{ + BDRVCopyBeforeWriteState *s = bs->opaque; + + WITH_QEMU_LOCK_GUARD(&s->lock) { + bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes); + } + + block_copy_reset(s->bcs, offset, bytes); + + return bdrv_co_pdiscard(s->target, offset, bytes); +} + static void cbw_refresh_filename(BlockDriverState *bs) { pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), @@ -194,6 +375,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, { BDRVCopyBeforeWriteState *s = bs->opaque; BdrvDirtyBitmap *bitmap = NULL; + int64_t cluster_size; bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, @@ -225,6 +407,27 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, return -EINVAL; } + cluster_size = block_copy_cluster_size(s->bcs); + + s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); + if (!s->done_bitmap) { + return -EINVAL; + } + bdrv_disable_dirty_bitmap(s->done_bitmap); + + /* s->access_bitmap starts equal to bcs bitmap */ + s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); + if (!s->access_bitmap) { + return -EINVAL; + } + bdrv_disable_dirty_bitmap(s->access_bitmap); + bdrv_dirty_bitmap_merge_internal(s->access_bitmap, + block_copy_dirty_bitmap(s->bcs), NULL, + true); + + qemu_co_mutex_init(&s->lock); + QLIST_INIT(&s->frozen_read_reqs); + return 0; } @@ -232,6 +435,9 @@ static void cbw_close(BlockDriverState *bs) { BDRVCopyBeforeWriteState *s = bs->opaque; + bdrv_release_dirty_bitmap(s->access_bitmap); + bdrv_release_dirty_bitmap(s->done_bitmap); + block_copy_state_free(s->bcs); s->bcs = NULL; } @@ -249,6 +455,10 @@ BlockDriver bdrv_cbw_filter = { .bdrv_co_pdiscard = cbw_co_pdiscard, .bdrv_co_flush = cbw_co_flush, + .bdrv_co_preadv_snapshot = cbw_co_preadv_snapshot, + .bdrv_co_pdiscard_snapshot = cbw_co_pdiscard_snapshot, + .bdrv_co_snapshot_block_status = cbw_co_snapshot_block_status, + .bdrv_refresh_filename = cbw_refresh_filename, .bdrv_child_perm = cbw_child_perm, diff --git a/tests/qemu-iotests/257.out b/tests/qemu-iotests/257.out index 50cbd8e882..aa76131ca9 100644 --- a/tests/qemu-iotests/257.out +++ b/tests/qemu-iotests/257.out @@ -106,6 +106,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -566,6 +582,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -819,6 +851,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -1279,6 +1327,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -1532,6 +1596,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -1992,6 +2072,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -2245,6 +2341,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -2705,6 +2817,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -2958,6 +3086,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -3418,6 +3562,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -3671,6 +3831,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -4131,6 +4307,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -4384,6 +4576,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false, @@ -4844,6 +5052,22 @@ write -P0x67 0x3fe0000 0x20000 {"return": ""} { "bitmaps": { + "backup-top": [ + { + "busy": false, + "count": 67108864, + "granularity": 65536, + "persistent": false, + "recording": false + }, + { + "busy": false, + "count": 458752, + "granularity": 65536, + "persistent": false, + "recording": false + } + ], "drive0": [ { "busy": false,