8a39b4d6e2
The raw format layer supports all flags via passthrough - but it only makes sense to pass through flags that the lower layer actually supports. The next patch gives stronger reasoning for why this is correct. At the moment, the raw format layer ignores the max_transfer limit of its protocol layer, and an attempt to do the qemu-io 'w -f 0 40m' to an NBD server that lacks FUA will pass the entire 40m request to the NBD driver, which then fragments the request itself into a 32m write, 8m write, and flush. But once the block layer starts honoring limits and fragmenting packets, the raw driver will hand the NBD driver two separate requests; if both requests have BDRV_REQ_FUA set, then this would result in a 32m write, flush, 8m write, and second flush. By having the raw layer no longer advertise FUA support when the protocol layer lacks it, we are back to a single flush at the block layer for the overall 40m request. Note that 'w -f -z 0 40m' does not currently exhibit the same problem, because there, the fragmentation does not occur until at the NBD layer (the raw layer has .bdrv_co_pwrite_zeroes, and the NBD layer doesn't advertise max_pwrite_zeroes to constrain things at the raw layer) - but the problem is latent and we would again have too many flushes without this patch once the NBD layer implements support for the new NBD_CMD_WRITE_ZEROES command, if it sets max_pwrite_zeroes to the same 32m limit as recommended by the NBD protocol. Signed-off-by: Eric Blake <eblake@redhat.com> Reviewed-by: Fam Zheng <famz@redhat.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Message-id: 1468607524-19021-3-git-send-email-eblake@redhat.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
268 lines
8.4 KiB
C
268 lines
8.4 KiB
C
/* BlockDriver implementation for "raw"
|
|
*
|
|
* Copyright (C) 2010-2016 Red Hat, Inc.
|
|
* Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
|
|
* Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com>
|
|
*
|
|
* Author:
|
|
* Laszlo Ersek <lersek@redhat.com>
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to
|
|
* deal in the Software without restriction, including without limitation the
|
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
* sell copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "block/block_int.h"
|
|
#include "qapi/error.h"
|
|
#include "qemu/option.h"
|
|
|
|
static QemuOptsList raw_create_opts = {
|
|
.name = "raw-create-opts",
|
|
.head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
|
|
.desc = {
|
|
{
|
|
.name = BLOCK_OPT_SIZE,
|
|
.type = QEMU_OPT_SIZE,
|
|
.help = "Virtual disk size"
|
|
},
|
|
{ /* end of list */ }
|
|
}
|
|
};
|
|
|
|
static int raw_reopen_prepare(BDRVReopenState *reopen_state,
|
|
BlockReopenQueue *queue, Error **errp)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
|
|
int nb_sectors, QEMUIOVector *qiov)
|
|
{
|
|
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
|
|
return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov);
|
|
}
|
|
|
|
static int coroutine_fn
|
|
raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
|
|
QEMUIOVector *qiov, int flags)
|
|
{
|
|
void *buf = NULL;
|
|
BlockDriver *drv;
|
|
QEMUIOVector local_qiov;
|
|
int ret;
|
|
|
|
if (bs->probed && sector_num == 0) {
|
|
/* As long as these conditions are true, we can't get partial writes to
|
|
* the probe buffer and can just directly check the request. */
|
|
QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
|
|
QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);
|
|
|
|
if (nb_sectors == 0) {
|
|
/* qemu_iovec_to_buf() would fail, but we want to return success
|
|
* instead of -EINVAL in this case. */
|
|
return 0;
|
|
}
|
|
|
|
buf = qemu_try_blockalign(bs->file->bs, 512);
|
|
if (!buf) {
|
|
ret = -ENOMEM;
|
|
goto fail;
|
|
}
|
|
|
|
ret = qemu_iovec_to_buf(qiov, 0, buf, 512);
|
|
if (ret != 512) {
|
|
ret = -EINVAL;
|
|
goto fail;
|
|
}
|
|
|
|
drv = bdrv_probe_all(buf, 512, NULL);
|
|
if (drv != bs->drv) {
|
|
ret = -EPERM;
|
|
goto fail;
|
|
}
|
|
|
|
/* Use the checked buffer, a malicious guest might be overwriting its
|
|
* original buffer in the background. */
|
|
qemu_iovec_init(&local_qiov, qiov->niov + 1);
|
|
qemu_iovec_add(&local_qiov, buf, 512);
|
|
qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
|
|
qiov = &local_qiov;
|
|
}
|
|
|
|
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
|
|
ret = bdrv_co_pwritev(bs->file, sector_num * BDRV_SECTOR_SIZE,
|
|
nb_sectors * BDRV_SECTOR_SIZE, qiov, flags);
|
|
|
|
fail:
|
|
if (qiov == &local_qiov) {
|
|
qemu_iovec_destroy(&local_qiov);
|
|
}
|
|
qemu_vfree(buf);
|
|
return ret;
|
|
}
|
|
|
|
static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
|
|
int64_t sector_num,
|
|
int nb_sectors, int *pnum,
|
|
BlockDriverState **file)
|
|
{
|
|
*pnum = nb_sectors;
|
|
*file = bs->file->bs;
|
|
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
|
|
(sector_num << BDRV_SECTOR_BITS);
|
|
}
|
|
|
|
static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
|
|
int64_t offset, int count,
|
|
BdrvRequestFlags flags)
|
|
{
|
|
return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
|
|
}
|
|
|
|
static int coroutine_fn raw_co_discard(BlockDriverState *bs,
|
|
int64_t sector_num, int nb_sectors)
|
|
{
|
|
return bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
|
|
}
|
|
|
|
static int64_t raw_getlength(BlockDriverState *bs)
|
|
{
|
|
return bdrv_getlength(bs->file->bs);
|
|
}
|
|
|
|
static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
|
|
{
|
|
return bdrv_get_info(bs->file->bs, bdi);
|
|
}
|
|
|
|
static int raw_truncate(BlockDriverState *bs, int64_t offset)
|
|
{
|
|
return bdrv_truncate(bs->file->bs, offset);
|
|
}
|
|
|
|
static int raw_media_changed(BlockDriverState *bs)
|
|
{
|
|
return bdrv_media_changed(bs->file->bs);
|
|
}
|
|
|
|
static void raw_eject(BlockDriverState *bs, bool eject_flag)
|
|
{
|
|
bdrv_eject(bs->file->bs, eject_flag);
|
|
}
|
|
|
|
static void raw_lock_medium(BlockDriverState *bs, bool locked)
|
|
{
|
|
bdrv_lock_medium(bs->file->bs, locked);
|
|
}
|
|
|
|
static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
|
|
unsigned long int req, void *buf,
|
|
BlockCompletionFunc *cb,
|
|
void *opaque)
|
|
{
|
|
return bdrv_aio_ioctl(bs->file->bs, req, buf, cb, opaque);
|
|
}
|
|
|
|
static int raw_has_zero_init(BlockDriverState *bs)
|
|
{
|
|
return bdrv_has_zero_init(bs->file->bs);
|
|
}
|
|
|
|
static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
|
|
{
|
|
return bdrv_create_file(filename, opts, errp);
|
|
}
|
|
|
|
static int raw_open(BlockDriverState *bs, QDict *options, int flags,
|
|
Error **errp)
|
|
{
|
|
bs->sg = bs->file->bs->sg;
|
|
bs->supported_write_flags = BDRV_REQ_FUA &
|
|
bs->file->bs->supported_write_flags;
|
|
bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
|
|
bs->file->bs->supported_zero_flags;
|
|
|
|
if (bs->probed && !bdrv_is_read_only(bs)) {
|
|
fprintf(stderr,
|
|
"WARNING: Image format was not specified for '%s' and probing "
|
|
"guessed raw.\n"
|
|
" Automatically detecting the format is dangerous for "
|
|
"raw images, write operations on block 0 will be restricted.\n"
|
|
" Specify the 'raw' format explicitly to remove the "
|
|
"restrictions.\n",
|
|
bs->file->bs->filename);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void raw_close(BlockDriverState *bs)
|
|
{
|
|
}
|
|
|
|
static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
|
|
{
|
|
/* smallest possible positive score so that raw is used if and only if no
|
|
* other block driver works
|
|
*/
|
|
return 1;
|
|
}
|
|
|
|
static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
|
|
{
|
|
return bdrv_probe_blocksizes(bs->file->bs, bsz);
|
|
}
|
|
|
|
static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
|
|
{
|
|
return bdrv_probe_geometry(bs->file->bs, geo);
|
|
}
|
|
|
|
BlockDriver bdrv_raw = {
|
|
.format_name = "raw",
|
|
.bdrv_probe = &raw_probe,
|
|
.bdrv_reopen_prepare = &raw_reopen_prepare,
|
|
.bdrv_open = &raw_open,
|
|
.bdrv_close = &raw_close,
|
|
.bdrv_create = &raw_create,
|
|
.bdrv_co_readv = &raw_co_readv,
|
|
.bdrv_co_writev_flags = &raw_co_writev_flags,
|
|
.bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
|
|
.bdrv_co_discard = &raw_co_discard,
|
|
.bdrv_co_get_block_status = &raw_co_get_block_status,
|
|
.bdrv_truncate = &raw_truncate,
|
|
.bdrv_getlength = &raw_getlength,
|
|
.has_variable_length = true,
|
|
.bdrv_get_info = &raw_get_info,
|
|
.bdrv_probe_blocksizes = &raw_probe_blocksizes,
|
|
.bdrv_probe_geometry = &raw_probe_geometry,
|
|
.bdrv_media_changed = &raw_media_changed,
|
|
.bdrv_eject = &raw_eject,
|
|
.bdrv_lock_medium = &raw_lock_medium,
|
|
.bdrv_aio_ioctl = &raw_aio_ioctl,
|
|
.create_opts = &raw_create_opts,
|
|
.bdrv_has_zero_init = &raw_has_zero_init
|
|
};
|
|
|
|
static void bdrv_raw_init(void)
|
|
{
|
|
bdrv_register(&bdrv_raw);
|
|
}
|
|
|
|
block_init(bdrv_raw_init);
|