0c8022876f
We are generally moving to int64_t for both offset and bytes parameters on all io paths. Main motivation is realization of 64-bit write_zeroes operation for fast zeroing large disk chunks, up to the whole disk. We chose signed type, to be consistent with off_t (which is signed) and with possibility for signed return type (where negative value means error). So, convert driver discard handlers bytes parameter to int64_t. The only caller of all updated function is bdrv_co_pdiscard in block/io.c. It is already prepared to work with 64bit requests, but pass at most max(bs->bl.max_pdiscard, INT_MAX) to the driver. Let's look at all updated functions: blkdebug: all calculations are still OK, thanks to bdrv_check_qiov_request(). both rule_check and bdrv_co_pdiscard are 64bit blklogwrites: pass to blk_loc_writes_co_log which is 64bit blkreplay, copy-on-read, filter-compress: pass to bdrv_co_pdiscard, OK copy-before-write: pass to bdrv_co_pdiscard which is 64bit and to cbw_do_copy_before_write which is 64bit file-posix: one handler calls raw_account_discard() is 64bit and both handlers calls raw_do_pdiscard(). Update raw_do_pdiscard, which pass to RawPosixAIOData::aio_nbytes, which is 64bit (and calls raw_account_discard()) gluster: somehow, third argument of glfs_discard_async is size_t. Let's set max_pdiscard accordingly. iscsi: iscsi_allocmap_set_invalid is 64bit, !is_byte_request_lun_aligned is 64bit. list.num is uint32_t. Let's clarify max_pdiscard and pdiscard_alignment. mirror_top: pass to bdrv_mirror_top_do_write() which is 64bit nbd: protocol limitation. max_pdiscard is alredy set strict enough, keep it as is for now. nvme: buf.nlb is uint32_t and we do shift. So, add corresponding limits to nvme_refresh_limits(). preallocate: pass to bdrv_co_pdiscard() which is 64bit. rbd: pass to qemu_rbd_start_co() which is 64bit. qcow2: calculations are still OK, thanks to bdrv_check_qiov_request(), qcow2_cluster_discard() is 64bit. raw-format: raw_adjust_offset() is 64bit, bdrv_co_pdiscard too. throttle: pass to bdrv_co_pdiscard() which is 64bit and to throttle_group_co_io_limits_intercept() which is 64bit as well. test-block-iothread: bytes argument is unused Great! Now all drivers are prepared to handle 64bit discard requests, or else have explicit max_pdiscard limits. Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Message-Id: <20210903102807.27127-11-vsementsov@virtuozzo.com> Reviewed-by: Eric Blake <eblake@redhat.com> Signed-off-by: Eric Blake <eblake@redhat.com>
258 lines
7.6 KiB
C
258 lines
7.6 KiB
C
/*
|
|
* copy-before-write filter driver
|
|
*
|
|
* The driver performs Copy-Before-Write (CBW) operation: it is injected above
|
|
* some node, and before each write it copies _old_ data to the target node.
|
|
*
|
|
* Copyright (c) 2018-2021 Virtuozzo International GmbH.
|
|
*
|
|
* Author:
|
|
* Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
|
|
#include "sysemu/block-backend.h"
|
|
#include "qemu/cutils.h"
|
|
#include "qapi/error.h"
|
|
#include "block/block_int.h"
|
|
#include "block/qdict.h"
|
|
#include "block/block-copy.h"
|
|
|
|
#include "block/copy-before-write.h"
|
|
|
|
typedef struct BDRVCopyBeforeWriteState {
|
|
BlockCopyState *bcs;
|
|
BdrvChild *target;
|
|
} BDRVCopyBeforeWriteState;
|
|
|
|
static coroutine_fn int cbw_co_preadv(
|
|
BlockDriverState *bs, int64_t offset, int64_t bytes,
|
|
QEMUIOVector *qiov, BdrvRequestFlags flags)
|
|
{
|
|
return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
|
|
}
|
|
|
|
static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
|
|
uint64_t offset, uint64_t bytes, BdrvRequestFlags flags)
|
|
{
|
|
BDRVCopyBeforeWriteState *s = bs->opaque;
|
|
uint64_t off, end;
|
|
int64_t cluster_size = block_copy_cluster_size(s->bcs);
|
|
|
|
if (flags & BDRV_REQ_WRITE_UNCHANGED) {
|
|
return 0;
|
|
}
|
|
|
|
off = QEMU_ALIGN_DOWN(offset, cluster_size);
|
|
end = QEMU_ALIGN_UP(offset + bytes, cluster_size);
|
|
|
|
return block_copy(s->bcs, off, end - off, true);
|
|
}
|
|
|
|
static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs,
|
|
int64_t offset, int64_t bytes)
|
|
{
|
|
int ret = cbw_do_copy_before_write(bs, offset, bytes, 0);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
|
|
return bdrv_co_pdiscard(bs->file, offset, bytes);
|
|
}
|
|
|
|
static int coroutine_fn cbw_co_pwrite_zeroes(BlockDriverState *bs,
|
|
int64_t offset, int64_t bytes, BdrvRequestFlags flags)
|
|
{
|
|
int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
|
|
return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
|
|
}
|
|
|
|
static coroutine_fn int cbw_co_pwritev(BlockDriverState *bs,
|
|
int64_t offset,
|
|
int64_t bytes,
|
|
QEMUIOVector *qiov,
|
|
BdrvRequestFlags flags)
|
|
{
|
|
int ret = cbw_do_copy_before_write(bs, offset, bytes, flags);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
|
|
return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
|
|
}
|
|
|
|
static int coroutine_fn cbw_co_flush(BlockDriverState *bs)
|
|
{
|
|
if (!bs->file) {
|
|
return 0;
|
|
}
|
|
|
|
return bdrv_co_flush(bs->file->bs);
|
|
}
|
|
|
|
static void cbw_refresh_filename(BlockDriverState *bs)
|
|
{
|
|
pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
|
|
bs->file->bs->filename);
|
|
}
|
|
|
|
static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c,
|
|
BdrvChildRole role,
|
|
BlockReopenQueue *reopen_queue,
|
|
uint64_t perm, uint64_t shared,
|
|
uint64_t *nperm, uint64_t *nshared)
|
|
{
|
|
if (!(role & BDRV_CHILD_FILTERED)) {
|
|
/*
|
|
* Target child
|
|
*
|
|
* Share write to target (child_file), to not interfere
|
|
* with guest writes to its disk which may be in target backing chain.
|
|
* Can't resize during a backup block job because we check the size
|
|
* only upfront.
|
|
*/
|
|
*nshared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
|
|
*nperm = BLK_PERM_WRITE;
|
|
} else {
|
|
/* Source child */
|
|
bdrv_default_perms(bs, c, role, reopen_queue,
|
|
perm, shared, nperm, nshared);
|
|
|
|
if (!QLIST_EMPTY(&bs->parents)) {
|
|
if (perm & BLK_PERM_WRITE) {
|
|
*nperm = *nperm | BLK_PERM_CONSISTENT_READ;
|
|
}
|
|
*nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
|
|
}
|
|
}
|
|
}
|
|
|
|
static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
|
|
Error **errp)
|
|
{
|
|
BDRVCopyBeforeWriteState *s = bs->opaque;
|
|
BdrvDirtyBitmap *copy_bitmap;
|
|
|
|
bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
|
|
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
|
|
false, errp);
|
|
if (!bs->file) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
s->target = bdrv_open_child(NULL, options, "target", bs, &child_of_bds,
|
|
BDRV_CHILD_DATA, false, errp);
|
|
if (!s->target) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
bs->total_sectors = bs->file->bs->total_sectors;
|
|
bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
|
|
(BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
|
|
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
|
|
((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
|
|
bs->file->bs->supported_zero_flags);
|
|
|
|
s->bcs = block_copy_state_new(bs->file, s->target, errp);
|
|
if (!s->bcs) {
|
|
error_prepend(errp, "Cannot create block-copy-state: ");
|
|
return -EINVAL;
|
|
}
|
|
|
|
copy_bitmap = block_copy_dirty_bitmap(s->bcs);
|
|
bdrv_set_dirty_bitmap(copy_bitmap, 0, bdrv_dirty_bitmap_size(copy_bitmap));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void cbw_close(BlockDriverState *bs)
|
|
{
|
|
BDRVCopyBeforeWriteState *s = bs->opaque;
|
|
|
|
block_copy_state_free(s->bcs);
|
|
s->bcs = NULL;
|
|
}
|
|
|
|
BlockDriver bdrv_cbw_filter = {
|
|
.format_name = "copy-before-write",
|
|
.instance_size = sizeof(BDRVCopyBeforeWriteState),
|
|
|
|
.bdrv_open = cbw_open,
|
|
.bdrv_close = cbw_close,
|
|
|
|
.bdrv_co_preadv = cbw_co_preadv,
|
|
.bdrv_co_pwritev = cbw_co_pwritev,
|
|
.bdrv_co_pwrite_zeroes = cbw_co_pwrite_zeroes,
|
|
.bdrv_co_pdiscard = cbw_co_pdiscard,
|
|
.bdrv_co_flush = cbw_co_flush,
|
|
|
|
.bdrv_refresh_filename = cbw_refresh_filename,
|
|
|
|
.bdrv_child_perm = cbw_child_perm,
|
|
|
|
.is_filter = true,
|
|
};
|
|
|
|
BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
|
|
BlockDriverState *target,
|
|
const char *filter_node_name,
|
|
BlockCopyState **bcs,
|
|
Error **errp)
|
|
{
|
|
ERRP_GUARD();
|
|
BDRVCopyBeforeWriteState *state;
|
|
BlockDriverState *top;
|
|
QDict *opts;
|
|
|
|
assert(source->total_sectors == target->total_sectors);
|
|
|
|
opts = qdict_new();
|
|
qdict_put_str(opts, "driver", "copy-before-write");
|
|
if (filter_node_name) {
|
|
qdict_put_str(opts, "node-name", filter_node_name);
|
|
}
|
|
qdict_put_str(opts, "file", bdrv_get_node_name(source));
|
|
qdict_put_str(opts, "target", bdrv_get_node_name(target));
|
|
|
|
top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp);
|
|
if (!top) {
|
|
return NULL;
|
|
}
|
|
|
|
state = top->opaque;
|
|
*bcs = state->bcs;
|
|
|
|
return top;
|
|
}
|
|
|
|
void bdrv_cbw_drop(BlockDriverState *bs)
|
|
{
|
|
bdrv_drop_filter(bs, &error_abort);
|
|
bdrv_unref(bs);
|
|
}
|
|
|
|
static void cbw_init(void)
|
|
{
|
|
bdrv_register(&bdrv_cbw_filter);
|
|
}
|
|
|
|
block_init(cbw_init);
|