qemu-e2k/include/sysemu/block-backend.h
Paolo Bonzini cc07162953 block: introduce max_hw_iov for use in scsi-generic
Linux limits the size of iovecs to 1024 (UIO_MAXIOV in the kernel
sources, IOV_MAX in POSIX).  Because of this, on some host adapters
requests with many iovecs are rejected with -EINVAL by the
io_submit() or readv()/writev() system calls.

In fact, the same limit applies to SG_IO as well.  To fix both the
EINVAL and the possible performance issues from using fewer iovecs
than allowed by Linux (some HBAs have max_segments as low as 128),
introduce a separate entry in BlockLimits to hold the max_segments
value from sysfs.  This new limit is used only for SG_IO and clamped
to bs->bl.max_iov anyway, just like max_hw_transfer is clamped to
bs->bl.max_transfer.

Reported-by: Halil Pasic <pasic@linux.ibm.com>
Cc: Hanna Reitz <hreitz@redhat.com>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: qemu-block@nongnu.org
Cc: qemu-stable@nongnu.org
Fixes: 18473467d5 ("file-posix: try BLKSECTGET on block devices too, do not round to power of 2", 2021-06-25)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210923130436.1187591-1-pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2021-10-06 10:25:55 +02:00

280 lines
13 KiB
C

/*
* QEMU Block backends
*
* Copyright (C) 2014-2016 Red Hat, Inc.
*
* Authors:
* Markus Armbruster <armbru@redhat.com>,
*
* This work is licensed under the terms of the GNU LGPL, version 2.1
* or later. See the COPYING.LIB file in the top-level directory.
*/
#ifndef BLOCK_BACKEND_H
#define BLOCK_BACKEND_H
#include "qemu/iov.h"
#include "block/throttle-groups.h"
/*
* TODO Have to include block/block.h for a bunch of block layer
* types. Unfortunately, this pulls in the whole BlockDriverState
* API, which we don't want used by many BlockBackend users. Some of
* the types belong here, and the rest should be split into a common
* header and one for the BlockDriverState API.
*/
#include "block/block.h"
/* Callbacks for block device models */
typedef struct BlockDevOps {
/*
* Runs when virtual media changed (monitor commands eject, change)
* Argument load is true on load and false on eject.
* Beware: doesn't run when a host device's physical media
* changes. Sure would be useful if it did.
* Device models with removable media must implement this callback.
*/
void (*change_media_cb)(void *opaque, bool load, Error **errp);
/*
* Runs when an eject request is issued from the monitor, the tray
* is closed, and the medium is locked.
* Device models that do not implement is_medium_locked will not need
* this callback. Device models that can lock the medium or tray might
* want to implement the callback and unlock the tray when "force" is
* true, even if they do not support eject requests.
*/
void (*eject_request_cb)(void *opaque, bool force);
/*
* Is the virtual tray open?
* Device models implement this only when the device has a tray.
*/
bool (*is_tray_open)(void *opaque);
/*
* Is the virtual medium locked into the device?
* Device models implement this only when device has such a lock.
*/
bool (*is_medium_locked)(void *opaque);
/*
* Runs when the size changed (e.g. monitor command block_resize)
*/
void (*resize_cb)(void *opaque);
/*
* Runs when the backend receives a drain request.
*/
void (*drained_begin)(void *opaque);
/*
* Runs when the backend's last drain request ends.
*/
void (*drained_end)(void *opaque);
/*
* Is the device still busy?
*/
bool (*drained_poll)(void *opaque);
} BlockDevOps;
/* This struct is embedded in (the private) BlockBackend struct and contains
* fields that must be public. This is in particular for QLIST_ENTRY() and
* friends so that BlockBackends can be kept in lists outside block-backend.c
* */
typedef struct BlockBackendPublic {
ThrottleGroupMember throttle_group_member;
} BlockBackendPublic;
BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
uint64_t shared_perm, Error **errp);
BlockBackend *blk_new_open(const char *filename, const char *reference,
QDict *options, int flags, Error **errp);
int blk_get_refcnt(BlockBackend *blk);
void blk_ref(BlockBackend *blk);
void blk_unref(BlockBackend *blk);
void blk_remove_all_bs(void);
const char *blk_name(const BlockBackend *blk);
BlockBackend *blk_by_name(const char *name);
BlockBackend *blk_next(BlockBackend *blk);
BlockBackend *blk_all_next(BlockBackend *blk);
bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp);
void monitor_remove_blk(BlockBackend *blk);
BlockBackendPublic *blk_get_public(BlockBackend *blk);
BlockBackend *blk_by_public(BlockBackendPublic *public);
BlockDriverState *blk_bs(BlockBackend *blk);
void blk_remove_bs(BlockBackend *blk);
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp);
bool bdrv_has_blk(BlockDriverState *bs);
bool bdrv_is_root_node(BlockDriverState *bs);
int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
Error **errp);
void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
void blk_iostatus_enable(BlockBackend *blk);
bool blk_iostatus_is_enabled(const BlockBackend *blk);
BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
void blk_iostatus_disable(BlockBackend *blk);
void blk_iostatus_reset(BlockBackend *blk);
void blk_iostatus_set_err(BlockBackend *blk, int error);
int blk_attach_dev(BlockBackend *blk, DeviceState *dev);
void blk_detach_dev(BlockBackend *blk, DeviceState *dev);
DeviceState *blk_get_attached_dev(BlockBackend *blk);
char *blk_get_attached_dev_id(BlockBackend *blk);
BlockBackend *blk_by_dev(void *dev);
BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
unsigned int bytes,
QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags);
int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset,
unsigned int bytes, void *buf,
BdrvRequestFlags flags)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
return blk_co_preadv(blk, offset, bytes, &qiov, flags);
}
static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset,
unsigned int bytes, void *buf,
BdrvRequestFlags flags)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
}
int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int bytes, BdrvRequestFlags flags);
BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int bytes, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque);
int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags);
int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes);
int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
BdrvRequestFlags flags);
int64_t blk_getlength(BlockBackend *blk);
void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
int64_t blk_nb_sectors(BlockBackend *blk);
BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque);
BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque);
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque);
BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int bytes,
BlockCompletionFunc *cb, void *opaque);
void blk_aio_cancel(BlockAIOCB *acb);
void blk_aio_cancel_async(BlockAIOCB *acb);
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque);
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes);
int blk_co_flush(BlockBackend *blk);
int blk_flush(BlockBackend *blk);
int blk_commit_all(void);
void blk_inc_in_flight(BlockBackend *blk);
void blk_dec_in_flight(BlockBackend *blk);
void blk_drain(BlockBackend *blk);
void blk_drain_all(void);
void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
BlockdevOnError on_write_error);
BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
int error);
void blk_error_action(BlockBackend *blk, BlockErrorAction action,
bool is_read, int error);
bool blk_supports_write_perm(BlockBackend *blk);
bool blk_is_writable(BlockBackend *blk);
bool blk_is_sg(BlockBackend *blk);
bool blk_enable_write_cache(BlockBackend *blk);
void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
void blk_invalidate_cache(BlockBackend *blk, Error **errp);
bool blk_is_inserted(BlockBackend *blk);
bool blk_is_available(BlockBackend *blk);
void blk_lock_medium(BlockBackend *blk, bool locked);
void blk_eject(BlockBackend *blk, bool eject_flag);
int blk_get_flags(BlockBackend *blk);
uint32_t blk_get_request_alignment(BlockBackend *blk);
uint32_t blk_get_max_transfer(BlockBackend *blk);
uint64_t blk_get_max_hw_transfer(BlockBackend *blk);
int blk_get_max_iov(BlockBackend *blk);
int blk_get_max_hw_iov(BlockBackend *blk);
void blk_set_guest_block_size(BlockBackend *blk, int align);
void *blk_try_blockalign(BlockBackend *blk, size_t size);
void *blk_blockalign(BlockBackend *blk, size_t size);
bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason);
void blk_op_block_all(BlockBackend *blk, Error *reason);
void blk_op_unblock_all(BlockBackend *blk, Error *reason);
AioContext *blk_get_aio_context(BlockBackend *blk);
int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
Error **errp);
void blk_add_aio_context_notifier(BlockBackend *blk,
void (*attached_aio_context)(AioContext *new_context, void *opaque),
void (*detach_aio_context)(void *opaque), void *opaque);
void blk_remove_aio_context_notifier(BlockBackend *blk,
void (*attached_aio_context)(AioContext *,
void *),
void (*detach_aio_context)(void *),
void *opaque);
void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify);
void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify);
void blk_io_plug(BlockBackend *blk);
void blk_io_unplug(BlockBackend *blk);
BlockAcctStats *blk_get_stats(BlockBackend *blk);
BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
void blk_update_root_state(BlockBackend *blk);
bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk);
int blk_get_open_flags_from_root_state(BlockBackend *blk);
void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque);
int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int bytes, BdrvRequestFlags flags);
int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
int bytes);
int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes);
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int64_t pos, int size);
int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size);
int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz);
int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo);
BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
BlockCompletionFunc *cb,
void *opaque, int ret);
void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg);
void blk_io_limits_disable(BlockBackend *blk);
void blk_io_limits_enable(BlockBackend *blk, const char *group);
void blk_io_limits_update_group(BlockBackend *blk, const char *group);
void blk_set_force_allow_inactivate(BlockBackend *blk);
void blk_register_buf(BlockBackend *blk, void *host, size_t size);
void blk_unregister_buf(BlockBackend *blk, void *host);
int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
BlockBackend *blk_out, int64_t off_out,
int bytes, BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags);
const BdrvChild *blk_root(BlockBackend *blk);
int blk_make_empty(BlockBackend *blk, Error **errp);
#endif