Block layer patches

- Fix VHDX image corruption bug
 - Fix for performance regression: Remove bdrv_co_get_geometry coroutines
   from I/O hot path
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmQ1dDARHGt3b2xmQHJl
 ZGhhdC5jb20ACgkQfwmycsiPL9ZM8BAArqnJjr2iAVT/yYHZKO8GUyrt3Ndq9aAb
 hlAoMud0Xkof588I1W4AelOBYz/Cm4OEeFNAYxFbWif6t1iSB/J3FG6EQMCRqOnV
 1GHIrJO9tolhjGx9GcjbYjXVJDyIsKDhcNCFJ9gke7+zVZLT8bLA5ibdZ2xYEcAp
 DfH27pBa6dlLd2CnDfkatpUwqqUDju+iXLaB4kGN/AG4Xv61Jk9ZqpRIyl1lToXO
 C9HDbHh3U/7fT2q9lMUXecOQnRFhXhvYSyiU+vcCFJPdijYPacC/HqJo200fG67y
 NDw/xviip3nFQWpxB06qx5A/H3UtmacGRSeckPvN7ZuEG4qFJSgYFsJL2+Rd11gu
 y2it06WWpYz+CFtlbfTkDuKj35F9VGFcmdfwnWxcmpMYDBWLbCJuzMpZJkJj5ahm
 QT6cv138nSvhvMpXLLZXER9opdGqqTU7LS2NqSTDFDKlPOnhofl1+FK0dhjrecEf
 A3bVfY8z8j+R2CYRzFINf2FUJA91XJjbv2kaJkV6Jq3x1usmgsm+QmCEefPpYF2l
 nlx5wFewxlqg8skMKDrKPXpB7d3KiKHy829HRJJtgg9RBoI9yST9kSRQ/o1IXlnP
 xCPG23Trik0dj9W178MDrBwf9ug0EKg2a4Ny3ohLq48sJP9pzjL1bR6j0Zww+tcz
 XMvgFSKspeY=
 =4z1y
 -----END PGP SIGNATURE-----

Merge tag 'for-upstream' of https://repo.or.cz/qemu/kevin into staging

Block layer patches

- Fix VHDX image corruption bug
- Fix for performance regression: Remove bdrv_co_get_geometry coroutines
  from I/O hot path

# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmQ1dDARHGt3b2xmQHJl
# ZGhhdC5jb20ACgkQfwmycsiPL9ZM8BAArqnJjr2iAVT/yYHZKO8GUyrt3Ndq9aAb
# hlAoMud0Xkof588I1W4AelOBYz/Cm4OEeFNAYxFbWif6t1iSB/J3FG6EQMCRqOnV
# 1GHIrJO9tolhjGx9GcjbYjXVJDyIsKDhcNCFJ9gke7+zVZLT8bLA5ibdZ2xYEcAp
# DfH27pBa6dlLd2CnDfkatpUwqqUDju+iXLaB4kGN/AG4Xv61Jk9ZqpRIyl1lToXO
# C9HDbHh3U/7fT2q9lMUXecOQnRFhXhvYSyiU+vcCFJPdijYPacC/HqJo200fG67y
# NDw/xviip3nFQWpxB06qx5A/H3UtmacGRSeckPvN7ZuEG4qFJSgYFsJL2+Rd11gu
# y2it06WWpYz+CFtlbfTkDuKj35F9VGFcmdfwnWxcmpMYDBWLbCJuzMpZJkJj5ahm
# QT6cv138nSvhvMpXLLZXER9opdGqqTU7LS2NqSTDFDKlPOnhofl1+FK0dhjrecEf
# A3bVfY8z8j+R2CYRzFINf2FUJA91XJjbv2kaJkV6Jq3x1usmgsm+QmCEefPpYF2l
# nlx5wFewxlqg8skMKDrKPXpB7d3KiKHy829HRJJtgg9RBoI9yST9kSRQ/o1IXlnP
# xCPG23Trik0dj9W178MDrBwf9ug0EKg2a4Ny3ohLq48sJP9pzjL1bR6j0Zww+tcz
# XMvgFSKspeY=
# =4z1y
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 11 Apr 2023 15:52:32 BST
# gpg:                using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg:                issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* tag 'for-upstream' of https://repo.or.cz/qemu/kevin:
  block, block-backend: write some hot coroutine wrappers by hand
  block-backend: ignore inserted state in blk_co_nb_sectors
  block-backend: inline bdrv_co_get_geometry
  migration/block: replace uses of blk_nb_sectors that do not check result
  block: remove has_variable_length from BlockDriver
  block: refresh bs->total_sectors on reopen
  block: remove has_variable_length from filters
  block: move has_variable_length to BlockLimits
  iotests: Regression test for vhdx log corruption
  block/vhdx: fix dynamic VHDX BAT corruption

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2023-04-11 16:19:06 +01:00
commit abb02ce0e7
17 changed files with 162 additions and 43 deletions

35
block.c
View File

@ -4918,6 +4918,7 @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state)
qdict_del(bs->options, "backing");
bdrv_refresh_limits(bs, NULL, NULL);
bdrv_refresh_total_sectors(bs, bs->total_sectors);
}
/*
@ -5849,7 +5850,7 @@ int64_t coroutine_fn bdrv_co_nb_sectors(BlockDriverState *bs)
if (!drv)
return -ENOMEDIUM;
if (drv->has_variable_length) {
if (bs->bl.has_variable_length) {
int ret = bdrv_co_refresh_total_sectors(bs, bs->total_sectors);
if (ret < 0) {
return ret;
@ -5858,6 +5859,28 @@ int64_t coroutine_fn bdrv_co_nb_sectors(BlockDriverState *bs)
return bs->total_sectors;
}
/*
* This wrapper is written by hand because this function is in the hot I/O path,
* via blk_get_geometry.
*/
int64_t coroutine_mixed_fn bdrv_nb_sectors(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
IO_CODE();
if (!drv)
return -ENOMEDIUM;
if (bs->bl.has_variable_length) {
int ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
if (ret < 0) {
return ret;
}
}
return bs->total_sectors;
}
/**
* Return length in bytes on success, -errno on error.
* The length is always a multiple of BDRV_SECTOR_SIZE.
@ -5878,16 +5901,6 @@ int64_t coroutine_fn bdrv_co_getlength(BlockDriverState *bs)
return ret * BDRV_SECTOR_SIZE;
}
/* return 0 as number of sectors if no device present or error */
void coroutine_fn bdrv_co_get_geometry(BlockDriverState *bs,
uint64_t *nb_sectors_ptr)
{
int64_t nb_sectors = bdrv_co_nb_sectors(bs);
IO_CODE();
*nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
}
bool bdrv_is_sg(BlockDriverState *bs)
{
IO_CODE();

View File

@ -1615,29 +1615,53 @@ int64_t coroutine_fn blk_co_getlength(BlockBackend *blk)
return bdrv_co_getlength(blk_bs(blk));
}
void coroutine_fn blk_co_get_geometry(BlockBackend *blk,
uint64_t *nb_sectors_ptr)
int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
IO_CODE();
GRAPH_RDLOCK_GUARD();
if (!blk_bs(blk)) {
*nb_sectors_ptr = 0;
if (!bs) {
return -ENOMEDIUM;
} else {
bdrv_co_get_geometry(blk_bs(blk), nb_sectors_ptr);
return bdrv_co_nb_sectors(bs);
}
}
int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk)
/*
* This wrapper is written by hand because this function is in the hot I/O path,
* via blk_get_geometry.
*/
int64_t coroutine_mixed_fn blk_nb_sectors(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
IO_CODE();
GRAPH_RDLOCK_GUARD();
if (!blk_co_is_available(blk)) {
if (!bs) {
return -ENOMEDIUM;
} else {
return bdrv_nb_sectors(bs);
}
}
return bdrv_co_nb_sectors(blk_bs(blk));
/* return 0 as number of sectors if no device present or error */
void coroutine_fn blk_co_get_geometry(BlockBackend *blk,
uint64_t *nb_sectors_ptr)
{
int64_t ret = blk_co_nb_sectors(blk);
*nb_sectors_ptr = ret < 0 ? 0 : ret;
}
/*
* This wrapper is written by hand because this function is in the hot I/O path.
*/
void coroutine_mixed_fn blk_get_geometry(BlockBackend *blk,
uint64_t *nb_sectors_ptr)
{
int64_t ret = blk_nb_sectors(blk);
*nb_sectors_ptr = ret < 0 ? 0 : ret;
}
BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,

View File

@ -259,7 +259,6 @@ static BlockDriver bdrv_copy_on_read = {
.bdrv_co_eject = cor_co_eject,
.bdrv_co_lock_medium = cor_co_lock_medium,
.has_variable_length = true,
.is_filter = true,
};

View File

@ -3743,6 +3743,12 @@ static void cdrom_parse_filename(const char *filename, QDict *options,
{
bdrv_parse_filename_strip_prefix(filename, "host_cdrom:", options);
}
static void cdrom_refresh_limits(BlockDriverState *bs, Error **errp)
{
bs->bl.has_variable_length = true;
raw_refresh_limits(bs, errp);
}
#endif
#ifdef __linux__
@ -3838,14 +3844,13 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_co_preadv = raw_co_preadv,
.bdrv_co_pwritev = raw_co_pwritev,
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_refresh_limits = cdrom_refresh_limits,
.bdrv_co_io_plug = raw_co_io_plug,
.bdrv_co_io_unplug = raw_co_io_unplug,
.bdrv_attach_aio_context = raw_aio_attach_aio_context,
.bdrv_co_truncate = raw_co_truncate,
.bdrv_co_getlength = raw_co_getlength,
.has_variable_length = true,
.bdrv_co_get_allocated_file_size = raw_co_get_allocated_file_size,
/* removable device support */
@ -3967,14 +3972,13 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_co_preadv = raw_co_preadv,
.bdrv_co_pwritev = raw_co_pwritev,
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_refresh_limits = cdrom_refresh_limits,
.bdrv_co_io_plug = raw_co_io_plug,
.bdrv_co_io_unplug = raw_co_io_unplug,
.bdrv_attach_aio_context = raw_aio_attach_aio_context,
.bdrv_co_truncate = raw_co_truncate,
.bdrv_co_getlength = raw_co_getlength,
.has_variable_length = true,
.bdrv_co_get_allocated_file_size = raw_co_get_allocated_file_size,
/* removable device support */

View File

@ -838,6 +838,7 @@ static void hdev_refresh_limits(BlockDriverState *bs, Error **errp)
{
/* XXX Does Windows support AIO on less than 512-byte alignment? */
bs->bl.request_alignment = 512;
bs->bl.has_variable_length = true;
}
static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
@ -933,7 +934,6 @@ static BlockDriver bdrv_host_device = {
.bdrv_attach_aio_context = raw_attach_aio_context,
.bdrv_co_getlength = raw_co_getlength,
.has_variable_length = true,
.bdrv_co_get_allocated_file_size = raw_co_get_allocated_file_size,
};

View File

@ -146,7 +146,6 @@ static BlockDriver bdrv_compress = {
.bdrv_co_eject = compress_co_eject,
.bdrv_co_lock_medium = compress_co_lock_medium,
.has_variable_length = true,
.is_filter = true,
};

View File

@ -190,6 +190,10 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
bdrv_merge_limits(&bs->bl, &c->bs->bl);
have_limits = true;
}
if (c->role & BDRV_CHILD_FILTERED) {
bs->bl.has_variable_length |= c->bs->bl.has_variable_length;
}
}
if (!have_limits) {

View File

@ -558,7 +558,6 @@ BlockDriver bdrv_preallocate_filter = {
.bdrv_set_perm = preallocate_set_perm,
.bdrv_child_perm = preallocate_child_perm,
.has_variable_length = true,
.is_filter = true,
};

View File

@ -377,6 +377,8 @@ raw_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
{
bs->bl.has_variable_length = bs->file->bs->bl.has_variable_length;
if (bs->probed) {
/* To make it easier to protect the first sector, any probed
* image is restricted to read-modify-write on sub-sector
@ -623,7 +625,6 @@ BlockDriver bdrv_raw = {
.bdrv_co_truncate = &raw_co_truncate,
.bdrv_co_getlength = &raw_co_getlength,
.is_format = true,
.has_variable_length = true,
.bdrv_measure = &raw_measure,
.bdrv_co_get_info = &raw_co_get_info,
.bdrv_refresh_limits = &raw_refresh_limits,

View File

@ -762,7 +762,6 @@ static BlockDriver bdrv_replication = {
.is_filter = true,
.has_variable_length = true,
.strong_runtime_opts = replication_strong_runtime_opts,
};

View File

@ -981,7 +981,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
sector_write = merged_sector;
} else if (i == sectors - 1 && trailing_length) {
/* partial sector at the end of the buffer */
ret = bdrv_pread(bs->file, file_offset,
ret = bdrv_pread(bs->file, file_offset + trailing_length,
VHDX_LOG_SECTOR_SIZE - trailing_length,
merged_sector + trailing_length, 0);
if (ret < 0) {

View File

@ -79,7 +79,7 @@ bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
int64_t coroutine_fn GRAPH_RDLOCK bdrv_co_nb_sectors(BlockDriverState *bs);
int64_t co_wrapper_mixed_bdrv_rdlock bdrv_nb_sectors(BlockDriverState *bs);
int64_t coroutine_mixed_fn bdrv_nb_sectors(BlockDriverState *bs);
int64_t coroutine_fn GRAPH_RDLOCK bdrv_co_getlength(BlockDriverState *bs);
int64_t co_wrapper_mixed_bdrv_rdlock bdrv_getlength(BlockDriverState *bs);
@ -90,9 +90,6 @@ int64_t co_wrapper bdrv_get_allocated_file_size(BlockDriverState *bs);
BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
BlockDriverState *in_bs, Error **errp);
void coroutine_fn GRAPH_RDLOCK
bdrv_co_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
int coroutine_fn GRAPH_RDLOCK
bdrv_co_delete_file(BlockDriverState *bs, Error **errp);

View File

@ -158,8 +158,6 @@ struct BlockDriver {
*/
bool supports_backing;
bool has_variable_length;
/*
* Drivers setting this field must be able to work with just a plain
* filename with '<protocol_name>:' as a prefix, and no other options.
@ -855,6 +853,14 @@ typedef struct BlockLimits {
/* maximum number of iovec elements */
int max_iov;
/*
* true if the length of the underlying file can change, and QEMU
* is expected to adjust automatically. Mostly for CD-ROM drives,
* whose length is zero when the tray is empty (they don't need
* an explicit monitor command to load the disk inside the guest).
*/
bool has_variable_length;
} BlockLimits;
typedef struct BdrvOpBlocker BdrvOpBlocker;

View File

@ -72,11 +72,10 @@ int64_t co_wrapper_mixed blk_getlength(BlockBackend *blk);
void coroutine_fn blk_co_get_geometry(BlockBackend *blk,
uint64_t *nb_sectors_ptr);
void co_wrapper_mixed blk_get_geometry(BlockBackend *blk,
uint64_t *nb_sectors_ptr);
void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk);
int64_t co_wrapper_mixed blk_nb_sectors(BlockBackend *blk);
int64_t blk_nb_sectors(BlockBackend *blk);
void *blk_try_blockalign(BlockBackend *blk, size_t size);
void *blk_blockalign(BlockBackend *blk, size_t size);

View File

@ -195,7 +195,7 @@ static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
{
int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
if (sector < blk_nb_sectors(bmds->blk)) {
if (sector < bmds->total_sectors) {
return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
(1UL << (chunk % (sizeof(unsigned long) * 8))));
} else {
@ -229,10 +229,9 @@ static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
static void alloc_aio_bitmap(BlkMigDevState *bmds)
{
BlockBackend *bb = bmds->blk;
int64_t bitmap_size;
bitmap_size = blk_nb_sectors(bb) + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
bitmap_size = bmds->total_sectors + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
bmds->aio_bitmap = g_malloc0(bitmap_size);

View File

@ -0,0 +1,62 @@
#!/usr/bin/env bash
# group: rw auto quick
#
# vhdx regression test: Updating the first entry of a BAT sector corrupted the
# following entries.
#
# Copyright (C) 2023 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# creator
owner=kwolf@redhat.com
seq=`basename $0`
echo "QA output created by $seq"
status=1 # failure is the default!
_cleanup()
{
_cleanup_test_img
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
cd ..
. ./common.rc
. ./common.filter
_supported_fmt generic
_supported_proto generic
_unsupported_imgopts "subformat=streamOptimized"
size=64M
_make_test_img $size
echo
echo "creating pattern"
$QEMU_IO -c "write -P 1 32M 4k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IO -c "write -P 2 0 4k" "$TEST_IMG" | _filter_qemu_io
$QEMU_IO -c "read -P 1 32M 4k" "$TEST_IMG" | _filter_qemu_io
echo
echo "checking image for errors"
_check_test_img
# success, all done
echo "*** done"
rm -f $seq.full
status=0

View File

@ -0,0 +1,14 @@
QA output created by regression-vhdx-log
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
creating pattern
wrote 4096/4096 bytes at offset 33554432
4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
wrote 4096/4096 bytes at offset 0
4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
read 4096/4096 bytes at offset 33554432
4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
checking image for errors
No errors were found on the image.
*** done