Pull request

v2:
  * Removed clang-format call from scripts/block-coroutine-wrapper.py. This
    avoids the issue with clang version incompatibility. It could be added back
    in the future but the code is readable without reformatting and it also
    makes the build less dependent on the environment.
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEhpWov9P5fNqsNXdanKSrs4Grc8gFAl97PuQACgkQnKSrs4Gr
 c8hTSQf/XMXm817fZxhiHssHisuh4c2TB1kUdDZPz9zr1sUg7fuBJEATtXGi9mYx
 cLHpDZWFesZEB19tmg9bTu1U5h+9/KAr4KKUWQSAhUPzFx6eQTz+/MOssLfGTyOT
 i7lBsDkA0czI0v1SxXx2bJNGQkOijQ2gpXVk/RumpCqPdH68p+l+rCbu9mNFr57/
 bjP6jiGbLpUBAfVwTHWkdyJrubwTrN9ni2RFRYb6GYs5qlO3yhVOo8Xs0zVkERkP
 YqNrLsIXtzxomc82Yenv11bmsK//kTPyeUsNl5DP8J1s9l6U2RCMo1IMcFa4/pbe
 D3wCPADfHQ4kaQpNrNsG64scSDkDpg==
 =AcXm
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/stefanha-gitlab/tags/block-pull-request' into staging

Pull request

v2:
 * Removed clang-format call from scripts/block-coroutine-wrapper.py. This
   avoids the issue with clang version incompatibility. It could be added back
   in the future but the code is readable without reformatting and it also
   makes the build less dependent on the environment.

# gpg: Signature made Mon 05 Oct 2020 16:42:28 BST
# gpg:                using RSA key 8695A8BFD3F97CDAAC35775A9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>" [full]
# gpg:                 aka "Stefan Hajnoczi <stefanha@gmail.com>" [full]
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35  775A 9CA4 ABB3 81AB 73C8

* remotes/stefanha-gitlab/tags/block-pull-request:
  util/vfio-helpers: Rework the IOVA allocator to avoid IOVA reserved regions
  util/vfio-helpers: Collect IOVA reserved regions
  docs: add 'io_uring' option to 'aio' param in qemu-options.hx
  include/block/block.h: drop non-ascii quotation mark
  block/io: refactor save/load vmstate
  block: drop bdrv_prwv
  block: generate coroutine-wrapper code
  scripts: add block-coroutine-wrapper.py
  block: declare some coroutine functions in block/coroutines.h
  block/io: refactor coroutine wrappers
  block: return error-code from bdrv_invalidate_cache
  block/nvme: Replace magic value by SCALE_MS definition
  block/nvme: Use register definitions from 'block/nvme.h'
  block/nvme: Drop NVMeRegs structure, directly use NvmeBar
  block/nvme: Reduce I/O registers scope
  block/nvme: Map doorbells pages write-only
  util/vfio-helpers: Pass page protections to qemu_vfio_pci_map_bar()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2020-10-06 12:15:59 +01:00
commit d7c5b78829
14 changed files with 607 additions and 427 deletions

97
block.c
View File

@ -48,6 +48,7 @@
#include "qemu/timer.h"
#include "qemu/cutils.h"
#include "qemu/id.h"
#include "block/coroutines.h"
#ifdef CONFIG_BSD
#include <sys/ioctl.h>
@ -4676,8 +4677,8 @@ static void bdrv_delete(BlockDriverState *bs)
* free of errors) or -errno when an internal error occurred. The results of the
* check are stored in res.
*/
static int coroutine_fn bdrv_co_check(BlockDriverState *bs,
BdrvCheckResult *res, BdrvCheckMode fix)
int coroutine_fn bdrv_co_check(BlockDriverState *bs,
BdrvCheckResult *res, BdrvCheckMode fix)
{
if (bs->drv == NULL) {
return -ENOMEDIUM;
@ -4690,43 +4691,6 @@ static int coroutine_fn bdrv_co_check(BlockDriverState *bs,
return bs->drv->bdrv_co_check(bs, res, fix);
}
typedef struct CheckCo {
BlockDriverState *bs;
BdrvCheckResult *res;
BdrvCheckMode fix;
int ret;
} CheckCo;
static void coroutine_fn bdrv_check_co_entry(void *opaque)
{
CheckCo *cco = opaque;
cco->ret = bdrv_co_check(cco->bs, cco->res, cco->fix);
aio_wait_kick();
}
int bdrv_check(BlockDriverState *bs,
BdrvCheckResult *res, BdrvCheckMode fix)
{
Coroutine *co;
CheckCo cco = {
.bs = bs,
.res = res,
.ret = -EINPROGRESS,
.fix = fix,
};
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
bdrv_check_co_entry(&cco);
} else {
co = qemu_coroutine_create(bdrv_check_co_entry, &cco);
bdrv_coroutine_enter(bs, co);
BDRV_POLL_WHILE(bs, cco.ret == -EINPROGRESS);
}
return cco.ret;
}
/*
* Return values:
* 0 - success
@ -5781,8 +5745,7 @@ void bdrv_init_with_whitelist(void)
bdrv_init();
}
static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
Error **errp)
int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
{
BdrvChild *child, *parent;
uint64_t perm, shared_perm;
@ -5791,14 +5754,14 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
BdrvDirtyBitmap *bm;
if (!bs->drv) {
return;
return -ENOMEDIUM;
}
QLIST_FOREACH(child, &bs->children, next) {
bdrv_co_invalidate_cache(child->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
return -EINVAL;
}
}
@ -5821,7 +5784,7 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
ret = bdrv_check_perm(bs, NULL, perm, shared_perm, NULL, NULL, errp);
if (ret < 0) {
bs->open_flags |= BDRV_O_INACTIVE;
return;
return ret;
}
bdrv_set_perm(bs, perm, shared_perm);
@ -5830,7 +5793,7 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
if (local_err) {
bs->open_flags |= BDRV_O_INACTIVE;
error_propagate(errp, local_err);
return;
return -EINVAL;
}
}
@ -5842,7 +5805,7 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
if (ret < 0) {
bs->open_flags |= BDRV_O_INACTIVE;
error_setg_errno(errp, -ret, "Could not refresh total sector count");
return;
return ret;
}
}
@ -5852,59 +5815,27 @@ static void coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs,
if (local_err) {
bs->open_flags |= BDRV_O_INACTIVE;
error_propagate(errp, local_err);
return;
return -EINVAL;
}
}
}
}
typedef struct InvalidateCacheCo {
BlockDriverState *bs;
Error **errp;
bool done;
} InvalidateCacheCo;
static void coroutine_fn bdrv_invalidate_cache_co_entry(void *opaque)
{
InvalidateCacheCo *ico = opaque;
bdrv_co_invalidate_cache(ico->bs, ico->errp);
ico->done = true;
aio_wait_kick();
}
void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
{
Coroutine *co;
InvalidateCacheCo ico = {
.bs = bs,
.done = false,
.errp = errp
};
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
bdrv_invalidate_cache_co_entry(&ico);
} else {
co = qemu_coroutine_create(bdrv_invalidate_cache_co_entry, &ico);
bdrv_coroutine_enter(bs, co);
BDRV_POLL_WHILE(bs, !ico.done);
}
return 0;
}
void bdrv_invalidate_cache_all(Error **errp)
{
BlockDriverState *bs;
Error *local_err = NULL;
BdrvNextIterator it;
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
int ret;
aio_context_acquire(aio_context);
bdrv_invalidate_cache(bs, &local_err);
ret = bdrv_invalidate_cache(bs, errp);
aio_context_release(aio_context);
if (local_err) {
error_propagate(errp, local_err);
if (ret < 0) {
bdrv_next_cleanup(&it);
return;
}

49
block/block-gen.h Normal file
View File

@ -0,0 +1,49 @@
/*
* Block coroutine wrapping core, used by auto-generated block/block-gen.c
*
* Copyright (c) 2003 Fabrice Bellard
* Copyright (c) 2020 Virtuozzo International GmbH
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef BLOCK_BLOCK_GEN_H
#define BLOCK_BLOCK_GEN_H
#include "block/block_int.h"
/* Base structure for argument packing structures */
typedef struct BdrvPollCo {
BlockDriverState *bs;
bool in_progress;
int ret;
Coroutine *co; /* Keep pointer here for debugging */
} BdrvPollCo;
static inline int bdrv_poll_co(BdrvPollCo *s)
{
assert(!qemu_in_coroutine());
bdrv_coroutine_enter(s->bs, s->co);
BDRV_POLL_WHILE(s->bs, s->in_progress);
return s->ret;
}
#endif /* BLOCK_BLOCK_GEN_H */

65
block/coroutines.h Normal file
View File

@ -0,0 +1,65 @@
/*
* Block layer I/O functions
*
* Copyright (c) 2003 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef BLOCK_COROUTINES_INT_H
#define BLOCK_COROUTINES_INT_H
#include "block/block_int.h"
int coroutine_fn bdrv_co_check(BlockDriverState *bs,
BdrvCheckResult *res, BdrvCheckMode fix);
int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
int generated_co_wrapper
bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags);
int generated_co_wrapper
bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags);
int coroutine_fn
bdrv_co_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
bool want_zero,
int64_t offset,
int64_t bytes,
int64_t *pnum,
int64_t *map,
BlockDriverState **file);
int generated_co_wrapper
bdrv_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
bool want_zero,
int64_t offset,
int64_t bytes,
int64_t *pnum,
int64_t *map,
BlockDriverState **file);
int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
QEMUIOVector *qiov, int64_t pos);
int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
QEMUIOVector *qiov, int64_t pos);
#endif /* BLOCK_COROUTINES_INT_H */

View File

@ -29,6 +29,7 @@
#include "block/blockjob.h"
#include "block/blockjob_int.h"
#include "block/block_int.h"
#include "block/coroutines.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
@ -889,89 +890,10 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
return 0;
}
typedef int coroutine_fn BdrvRequestEntry(void *opaque);
typedef struct BdrvRunCo {
BdrvRequestEntry *entry;
void *opaque;
int ret;
bool done;
Coroutine *co; /* Coroutine, running bdrv_run_co_entry, for debugging */
} BdrvRunCo;
static void coroutine_fn bdrv_run_co_entry(void *opaque)
{
BdrvRunCo *arg = opaque;
arg->ret = arg->entry(arg->opaque);
arg->done = true;
aio_wait_kick();
}
static int bdrv_run_co(BlockDriverState *bs, BdrvRequestEntry *entry,
void *opaque)
{
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
return entry(opaque);
} else {
BdrvRunCo s = { .entry = entry, .opaque = opaque };
s.co = qemu_coroutine_create(bdrv_run_co_entry, &s);
bdrv_coroutine_enter(bs, s.co);
BDRV_POLL_WHILE(bs, !s.done);
return s.ret;
}
}
typedef struct RwCo {
BdrvChild *child;
int64_t offset;
QEMUIOVector *qiov;
bool is_write;
BdrvRequestFlags flags;
} RwCo;
static int coroutine_fn bdrv_rw_co_entry(void *opaque)
{
RwCo *rwco = opaque;
if (!rwco->is_write) {
return bdrv_co_preadv(rwco->child, rwco->offset,
rwco->qiov->size, rwco->qiov,
rwco->flags);
} else {
return bdrv_co_pwritev(rwco->child, rwco->offset,
rwco->qiov->size, rwco->qiov,
rwco->flags);
}
}
/*
* Process a vectored synchronous request using coroutines
*/
static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
QEMUIOVector *qiov, bool is_write,
BdrvRequestFlags flags)
{
RwCo rwco = {
.child = child,
.offset = offset,
.qiov = qiov,
.is_write = is_write,
.flags = flags,
};
return bdrv_run_co(child->bs, bdrv_rw_co_entry, &rwco);
}
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int bytes, BdrvRequestFlags flags)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, bytes);
return bdrv_prwv_co(child, offset, &qiov, true,
return bdrv_pwritev(child, offset, bytes, NULL,
BDRV_REQ_ZERO_WRITE | flags);
}
@ -1016,41 +938,19 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
}
}
/* return < 0 if error. See bdrv_pwrite() for the return codes */
int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
{
int ret;
ret = bdrv_prwv_co(child, offset, qiov, false, 0);
if (ret < 0) {
return ret;
}
return qiov->size;
}
/* See bdrv_pwrite() for the return codes */
int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
{
int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
if (bytes < 0) {
return -EINVAL;
}
return bdrv_preadv(child, offset, &qiov);
}
ret = bdrv_preadv(child, offset, bytes, &qiov, 0);
int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
{
int ret;
ret = bdrv_prwv_co(child, offset, qiov, true, 0);
if (ret < 0) {
return ret;
}
return qiov->size;
return ret < 0 ? ret : bytes;
}
/* Return no. of bytes on success or < 0 on error. Important errors are:
@ -1061,13 +961,16 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
*/
int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
{
int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
if (bytes < 0) {
return -EINVAL;
}
return bdrv_pwritev(child, offset, &qiov);
ret = bdrv_pwritev(child, offset, bytes, &qiov, 0);
return ret < 0 ? ret : bytes;
}
/*
@ -2243,18 +2146,6 @@ int bdrv_flush_all(void)
return result;
}
typedef struct BdrvCoBlockStatusData {
BlockDriverState *bs;
BlockDriverState *base;
bool want_zero;
int64_t offset;
int64_t bytes;
int64_t *pnum;
int64_t *map;
BlockDriverState **file;
} BdrvCoBlockStatusData;
/*
* Returns the allocation status of the specified sectors.
* Drivers not implementing the functionality are assumed to not support
@ -2449,14 +2340,15 @@ early_out:
return ret;
}
static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
bool want_zero,
int64_t offset,
int64_t bytes,
int64_t *pnum,
int64_t *map,
BlockDriverState **file)
int coroutine_fn
bdrv_co_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
bool want_zero,
int64_t offset,
int64_t bytes,
int64_t *pnum,
int64_t *map,
BlockDriverState **file)
{
BlockDriverState *p;
int ret = 0;
@ -2489,43 +2381,6 @@ static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
return ret;
}
/* Coroutine wrapper for bdrv_block_status_above() */
static int coroutine_fn bdrv_block_status_above_co_entry(void *opaque)
{
BdrvCoBlockStatusData *data = opaque;
return bdrv_co_block_status_above(data->bs, data->base,
data->want_zero,
data->offset, data->bytes,
data->pnum, data->map, data->file);
}
/*
* Synchronous wrapper around bdrv_co_block_status_above().
*
* See bdrv_co_block_status_above() for details.
*/
static int bdrv_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
bool want_zero, int64_t offset,
int64_t bytes, int64_t *pnum,
int64_t *map,
BlockDriverState **file)
{
BdrvCoBlockStatusData data = {
.bs = bs,
.base = base,
.want_zero = want_zero,
.offset = offset,
.bytes = bytes,
.pnum = pnum,
.map = map,
.file = file,
};
return bdrv_run_co(bs, bdrv_block_status_above_co_entry, &data);
}
int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file)
@ -2619,96 +2474,70 @@ int bdrv_is_allocated_above(BlockDriverState *top,
return 0;
}
typedef struct BdrvVmstateCo {
BlockDriverState *bs;
QEMUIOVector *qiov;
int64_t pos;
bool is_read;
} BdrvVmstateCo;
static int coroutine_fn
bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
bool is_read)
int coroutine_fn
bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
{
BlockDriver *drv = bs->drv;
BlockDriverState *child_bs = bdrv_primary_bs(bs);
int ret = -ENOTSUP;
if (!drv) {
return -ENOMEDIUM;
}
bdrv_inc_in_flight(bs);
if (!drv) {
ret = -ENOMEDIUM;
} else if (drv->bdrv_load_vmstate) {
if (is_read) {
ret = drv->bdrv_load_vmstate(bs, qiov, pos);
} else {
ret = drv->bdrv_save_vmstate(bs, qiov, pos);
}
if (drv->bdrv_load_vmstate) {
ret = drv->bdrv_load_vmstate(bs, qiov, pos);
} else if (child_bs) {
ret = bdrv_co_rw_vmstate(child_bs, qiov, pos, is_read);
ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
}
bdrv_dec_in_flight(bs);
return ret;
}
static int coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
int coroutine_fn
bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
{
BdrvVmstateCo *co = opaque;
BlockDriver *drv = bs->drv;
BlockDriverState *child_bs = bdrv_primary_bs(bs);
int ret = -ENOTSUP;
return bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
}
if (!drv) {
return -ENOMEDIUM;
}
static inline int
bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
bool is_read)
{
BdrvVmstateCo data = {
.bs = bs,
.qiov = qiov,
.pos = pos,
.is_read = is_read,
};
bdrv_inc_in_flight(bs);
return bdrv_run_co(bs, bdrv_co_rw_vmstate_entry, &data);
if (drv->bdrv_save_vmstate) {
ret = drv->bdrv_save_vmstate(bs, qiov, pos);
} else if (child_bs) {
ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
}
bdrv_dec_in_flight(bs);
return ret;
}
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
int64_t pos, int size)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
int ret;
int ret = bdrv_writev_vmstate(bs, &qiov, pos);
ret = bdrv_writev_vmstate(bs, &qiov, pos);
if (ret < 0) {
return ret;
}
return size;
}
int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
{
return bdrv_rw_vmstate(bs, qiov, pos, false);
return ret < 0 ? ret : size;
}
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
int64_t pos, int size)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
int ret;
int ret = bdrv_readv_vmstate(bs, &qiov, pos);
ret = bdrv_readv_vmstate(bs, &qiov, pos);
if (ret < 0) {
return ret;
}
return size;
}
int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
{
return bdrv_rw_vmstate(bs, qiov, pos, true);
return ret < 0 ? ret : size;
}
/**************************************************************/
@ -2748,11 +2577,6 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb)
/**************************************************************/
/* Coroutine block device emulation */
static int coroutine_fn bdrv_flush_co_entry(void *opaque)
{
return bdrv_co_flush(opaque);
}
int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
{
BdrvChild *primary_child = bdrv_primary_child(bs);
@ -2876,24 +2700,6 @@ early_exit:
return ret;
}
int bdrv_flush(BlockDriverState *bs)
{
return bdrv_run_co(bs, bdrv_flush_co_entry, bs);
}
typedef struct DiscardCo {
BdrvChild *child;
int64_t offset;
int64_t bytes;
} DiscardCo;
static int coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
{
DiscardCo *rwco = opaque;
return bdrv_co_pdiscard(rwco->child, rwco->offset, rwco->bytes);
}
int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
int64_t bytes)
{
@ -3008,17 +2814,6 @@ out:
return ret;
}
int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes)
{
DiscardCo rwco = {
.child = child,
.offset = offset,
.bytes = bytes,
};
return bdrv_run_co(child->bs, bdrv_pdiscard_co_entry, &rwco);
}
int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
{
BlockDriver *drv = bs->drv;
@ -3420,35 +3215,3 @@ out:
return ret;
}
typedef struct TruncateCo {
BdrvChild *child;
int64_t offset;
bool exact;
PreallocMode prealloc;
BdrvRequestFlags flags;
Error **errp;
} TruncateCo;
static int coroutine_fn bdrv_truncate_co_entry(void *opaque)
{
TruncateCo *tco = opaque;
return bdrv_co_truncate(tco->child, tco->offset, tco->exact,
tco->prealloc, tco->flags, tco->errp);
}
int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
{
TruncateCo tco = {
.child = child,
.offset = offset,
.exact = exact,
.prealloc = prealloc,
.flags = flags,
.errp = errp,
};
return bdrv_run_co(child->bs, bdrv_truncate_co_entry, &tco);
}

View File

@ -107,6 +107,14 @@ module_block_h = custom_target('module_block.h',
command: [module_block_py, '@OUTPUT0@', modsrc])
block_ss.add(module_block_h)
wrapper_py = find_program('../scripts/block-coroutine-wrapper.py')
block_gen_c = custom_target('block-gen.c',
output: 'block-gen.c',
input: files('../include/block/block.h',
'coroutines.h'),
command: [wrapper_py, '@OUTPUT@', '@INPUT@'])
block_ss.add(block_gen_c)
block_ss.add(files('stream.c'))
softmmu_ss.add(files('qapi-sysemu.c'))

View File

@ -31,7 +31,7 @@
#define NVME_SQ_ENTRY_BYTES 64
#define NVME_CQ_ENTRY_BYTES 16
#define NVME_QUEUE_SIZE 128
#define NVME_BAR_SIZE 8192
#define NVME_DOORBELL_SIZE 4096
/*
* We have to leave one slot empty as that is the full queue case where
@ -81,15 +81,6 @@ typedef struct {
QEMUBH *completion_bh;
} NVMeQueuePair;
/* Memory mapped registers */
typedef volatile struct {
NvmeBar ctrl;
struct {
uint32_t sq_tail;
uint32_t cq_head;
} doorbells[];
} NVMeRegs;
#define INDEX_ADMIN 0
#define INDEX_IO(n) (1 + n)
@ -102,7 +93,11 @@ enum {
struct BDRVNVMeState {
AioContext *aio_context;
QEMUVFIOState *vfio;
NVMeRegs *regs;
/* Memory mapped registers */
volatile struct {
uint32_t sq_tail;
uint32_t cq_head;
} *doorbells;
/* The submission/completion queue pairs.
* [0]: admin queue.
* [1..]: io queues.
@ -247,14 +242,14 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
error_propagate(errp, local_err);
goto fail;
}
q->sq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].sq_tail;
q->sq.doorbell = &s->doorbells[idx * s->doorbell_scale].sq_tail;
nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto fail;
}
q->cq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].cq_head;
q->cq.doorbell = &s->doorbells[idx * s->doorbell_scale].cq_head;
return q;
fail:
@ -694,6 +689,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
uint64_t timeout_ms;
uint64_t deadline, now;
Error *local_err = NULL;
volatile NvmeBar *regs = NULL;
qemu_co_mutex_init(&s->dma_map_lock);
qemu_co_queue_init(&s->dma_flush_queue);
@ -712,32 +708,32 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
goto out;
}
s->regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, NVME_BAR_SIZE, errp);
if (!s->regs) {
regs = qemu_vfio_pci_map_bar(s->vfio, 0, 0, sizeof(NvmeBar),
PROT_READ | PROT_WRITE, errp);
if (!regs) {
ret = -EINVAL;
goto out;
}
/* Perform initialize sequence as described in NVMe spec "7.6.1
* Initialization". */
cap = le64_to_cpu(s->regs->ctrl.cap);
if (!(cap & (1ULL << 37))) {
cap = le64_to_cpu(regs->cap);
if (!NVME_CAP_CSS(cap)) {
error_setg(errp, "Device doesn't support NVMe command set");
ret = -EINVAL;
goto out;
}
s->page_size = MAX(4096, 1 << (12 + ((cap >> 48) & 0xF)));
s->doorbell_scale = (4 << (((cap >> 32) & 0xF))) / sizeof(uint32_t);
s->page_size = MAX(4096, 1 << NVME_CAP_MPSMIN(cap));
s->doorbell_scale = (4 << NVME_CAP_DSTRD(cap)) / sizeof(uint32_t);
bs->bl.opt_mem_alignment = s->page_size;
timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 30000);
timeout_ms = MIN(500 * NVME_CAP_TO(cap), 30000);
/* Reset device to get a clean state. */
s->regs->ctrl.cc = cpu_to_le32(le32_to_cpu(s->regs->ctrl.cc) & 0xFE);
regs->cc = cpu_to_le32(le32_to_cpu(regs->cc) & 0xFE);
/* Wait for CSTS.RDY = 0. */
deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
while (le32_to_cpu(s->regs->ctrl.csts) & 0x1) {
while (NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to reset (%"
PRId64 " ms)",
@ -747,6 +743,13 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
}
}
s->doorbells = qemu_vfio_pci_map_bar(s->vfio, 0, sizeof(NvmeBar),
NVME_DOORBELL_SIZE, PROT_WRITE, errp);
if (!s->doorbells) {
ret = -EINVAL;
goto out;
}
/* Set up admin queue. */
s->queues = g_new(NVMeQueuePair *, 1);
s->queues[INDEX_ADMIN] = nvme_create_queue_pair(s, aio_context, 0,
@ -758,18 +761,19 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
}
s->nr_queues = 1;
QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
s->regs->ctrl.aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
s->regs->ctrl.asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
s->regs->ctrl.acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << AQA_ACQS_SHIFT) |
(NVME_QUEUE_SIZE << AQA_ASQS_SHIFT));
regs->asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
regs->acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
/* After setting up all control registers we can enable device now. */
s->regs->ctrl.cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
(ctz32(NVME_SQ_ENTRY_BYTES) << 16) |
0x1);
regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << CC_IOCQES_SHIFT) |
(ctz32(NVME_SQ_ENTRY_BYTES) << CC_IOSQES_SHIFT) |
CC_EN_MASK);
/* Wait for CSTS.RDY = 1. */
now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
deadline = now + timeout_ms * 1000000;
while (!(le32_to_cpu(s->regs->ctrl.csts) & 0x1)) {
deadline = now + timeout_ms * SCALE_MS;
while (!NVME_CSTS_RDY(le32_to_cpu(regs->csts))) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to start (%"
PRId64 " ms)",
@ -800,6 +804,10 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
ret = -EIO;
}
out:
if (regs) {
qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)regs, 0, sizeof(NvmeBar));
}
/* Cleaning up is done in nvme_file_open() upon error. */
return ret;
}
@ -872,7 +880,8 @@ static void nvme_close(BlockDriverState *bs)
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
false, NULL, NULL);
event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]);
qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, NVME_BAR_SIZE);
qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->doorbells,
sizeof(NvmeBar), NVME_DOORBELL_SIZE);
qemu_vfio_close(s->vfio);
g_free(s->device);

View File

@ -0,0 +1,54 @@
=======================
block-coroutine-wrapper
=======================
A lot of functions in QEMU block layer (see ``block/*``) can only be
called in coroutine context. Such functions are normally marked by the
coroutine_fn specifier. Still, sometimes we need to call them from
non-coroutine context; for this we need to start a coroutine, run the
needed function from it and wait for the coroutine to finish in a
BDRV_POLL_WHILE() loop. To run a coroutine we need a function with one
void* argument. So for each coroutine_fn function which needs a
non-coroutine interface, we should define a structure to pack the
parameters, define a separate function to unpack the parameters and
call the original function and finally define a new interface function
with same list of arguments as original one, which will pack the
parameters into a struct, create a coroutine, run it and wait in
BDRV_POLL_WHILE() loop. It's boring to create such wrappers by hand,
so we have a script to generate them.
Usage
=====
Assume we have defined the ``coroutine_fn`` function
``bdrv_co_foo(<some args>)`` and need a non-coroutine interface for it,
called ``bdrv_foo(<same args>)``. In this case the script can help. To
trigger the generation:
1. You need ``bdrv_foo`` declaration somewhere (for example, in
``block/coroutines.h``) with the ``generated_co_wrapper`` mark,
like this:
.. code-block:: c
int generated_co_wrapper bdrv_foo(<some args>);
2. You need to feed this declaration to block-coroutine-wrapper script.
For this, add the .h (or .c) file with the declaration to the
``input: files(...)`` list of ``block_gen_c`` target declaration in
``block/meson.build``
You are done. During the build, coroutine wrappers will be generated in
``<BUILD_DIR>/block/block-gen.c``.
Links
=====
1. The script location is ``scripts/block-coroutine-wrapper.py``.
2. Generic place for private ``generated_co_wrapper`` declarations is
``block/coroutines.h``, for public declarations:
``include/block/block.h``
3. The core API of generated coroutine wrappers is placed in
(not generated) ``block/block-gen.h``

View File

@ -32,3 +32,4 @@ Contents:
s390-dasd-ipl
clocks
qom
block-coroutine-wrapper

View File

@ -10,6 +10,16 @@
#include "block/blockjob.h"
#include "qemu/hbitmap.h"
/*
* generated_co_wrapper
*
* Function specifier, which does nothing but mark functions to be
* generated by scripts/block-coroutine-wrapper.py
*
* Read more in docs/devel/block-coroutine-wrapper.rst
*/
#define generated_co_wrapper
/* block.c */
typedef struct BlockDriver BlockDriver;
typedef struct BdrvChild BdrvChild;
@ -294,7 +304,7 @@ enum BdrvChildRoleBits {
BDRV_CHILD_FILTERED = (1 << 2),
/*
* Child from which to read all data that isnt allocated in the
* Child from which to read all data that isn't allocated in the
* parent (i.e., the backing child); such data is copied to the
* parent through COW (and optionally COR).
* This field is mutually exclusive with DATA, METADATA, and
@ -373,9 +383,7 @@ int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int bytes, BdrvRequestFlags flags);
int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes);
int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov);
int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes);
int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov);
int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
const void *buf, int count);
/*
@ -393,8 +401,9 @@ void bdrv_refresh_filename(BlockDriverState *bs);
int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags,
Error **errp);
int bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
int generated_co_wrapper
bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
int64_t bdrv_nb_sectors(BlockDriverState *bs);
int64_t bdrv_getlength(BlockDriverState *bs);
@ -436,7 +445,8 @@ typedef enum {
BDRV_FIX_ERRORS = 2,
} BdrvCheckMode;
int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix);
/* The units of offset and total_work_size may be chosen arbitrarily by the
* block driver; total_work_size may change during the course of the amendment
@ -460,12 +470,13 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb);
int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
/* Invalidate any cached metadata used by image formats */
void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs,
Error **errp);
void bdrv_invalidate_cache_all(Error **errp);
int bdrv_inactivate_all(void);
/* Ensure contents are flushed to disk. */
int bdrv_flush(BlockDriverState *bs);
int generated_co_wrapper bdrv_flush(BlockDriverState *bs);
int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
int bdrv_flush_all(void);
void bdrv_close_all(void);
@ -480,7 +491,8 @@ void bdrv_drain_all(void);
AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \
cond); })
int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset,
int64_t bytes);
int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
int bdrv_has_zero_init_1(BlockDriverState *bs);
int bdrv_has_zero_init(BlockDriverState *bs);
@ -560,8 +572,10 @@ int path_has_protocol(const char *path);
int path_is_absolute(const char *path);
char *path_combine(const char *base_path, const char *filename);
int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
int generated_co_wrapper
bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
int generated_co_wrapper
bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
int64_t pos, int size);

View File

@ -22,7 +22,7 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s);
void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host);
void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
uint64_t offset, uint64_t size,
uint64_t offset, uint64_t size, int prot,
Error **errp);
void qemu_vfio_pci_unmap_bar(QEMUVFIOState *s, int index, void *bar,
uint64_t offset, uint64_t size);

View File

@ -1053,7 +1053,8 @@ SRST
The path to the image file in the local filesystem
``aio``
Specifies the AIO backend (threads/native, default: threads)
Specifies the AIO backend (threads/native/io_uring,
default: threads)
``locking``
Specifies whether the image file is protected with Linux OFD
@ -1175,7 +1176,8 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
"-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
" [,cache=writethrough|writeback|none|directsync|unsafe][,format=f]\n"
" [,snapshot=on|off][,rerror=ignore|stop|report]\n"
" [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
" [,werror=ignore|stop|report|enospc][,id=name]\n"
" [,aio=threads|native|io_uring]\n"
" [,readonly=on|off][,copy-on-read=on|off]\n"
" [,discard=ignore|unmap][,detect-zeroes=on|off|unmap]\n"
" [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
@ -1247,8 +1249,8 @@ SRST
The default mode is ``cache=writeback``.
``aio=aio``
aio is "threads", or "native" and selects between pthread based
disk I/O and native Linux AIO.
aio is "threads", "native", or "io_uring" and selects between pthread
based disk I/O, native Linux AIO, or Linux io_uring API.
``format=format``
Specify which disk format will be used rather than detecting the

View File

@ -0,0 +1,167 @@
#! /usr/bin/env python3
"""Generate coroutine wrappers for block subsystem.
The program parses one or several concatenated c files from stdin,
searches for functions with the 'generated_co_wrapper' specifier
and generates corresponding wrappers on stdout.
Usage: block-coroutine-wrapper.py generated-file.c FILE.[ch]...
Copyright (c) 2020 Virtuozzo International GmbH.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import sys
import re
from typing import Iterator
def gen_header():
copyright = re.sub('^.*Copyright', 'Copyright', __doc__, flags=re.DOTALL)
copyright = re.sub('^(?=.)', ' * ', copyright.strip(), flags=re.MULTILINE)
copyright = re.sub('^$', ' *', copyright, flags=re.MULTILINE)
return f"""\
/*
* File is generated by scripts/block-coroutine-wrapper.py
*
{copyright}
*/
#include "qemu/osdep.h"
#include "block/coroutines.h"
#include "block/block-gen.h"
#include "block/block_int.h"\
"""
class ParamDecl:
param_re = re.compile(r'(?P<decl>'
r'(?P<type>.*[ *])'
r'(?P<name>[a-z][a-z0-9_]*)'
r')')
def __init__(self, param_decl: str) -> None:
m = self.param_re.match(param_decl.strip())
if m is None:
raise ValueError(f'Wrong parameter declaration: "{param_decl}"')
self.decl = m.group('decl')
self.type = m.group('type')
self.name = m.group('name')
class FuncDecl:
def __init__(self, return_type: str, name: str, args: str) -> None:
self.return_type = return_type.strip()
self.name = name.strip()
self.args = [ParamDecl(arg.strip()) for arg in args.split(',')]
def gen_list(self, format: str) -> str:
return ', '.join(format.format_map(arg.__dict__) for arg in self.args)
def gen_block(self, format: str) -> str:
return '\n'.join(format.format_map(arg.__dict__) for arg in self.args)
# Match wrappers declared with a generated_co_wrapper mark
func_decl_re = re.compile(r'^int\s*generated_co_wrapper\s*'
r'(?P<wrapper_name>[a-z][a-z0-9_]*)'
r'\((?P<args>[^)]*)\);$', re.MULTILINE)
def func_decl_iter(text: str) -> Iterator:
for m in func_decl_re.finditer(text):
yield FuncDecl(return_type='int',
name=m.group('wrapper_name'),
args=m.group('args'))
def snake_to_camel(func_name: str) -> str:
"""
Convert underscore names like 'some_function_name' to camel-case like
'SomeFunctionName'
"""
words = func_name.split('_')
words = [w[0].upper() + w[1:] for w in words]
return ''.join(words)
def gen_wrapper(func: FuncDecl) -> str:
assert func.name.startswith('bdrv_')
assert not func.name.startswith('bdrv_co_')
assert func.return_type == 'int'
assert func.args[0].type in ['BlockDriverState *', 'BdrvChild *']
name = 'bdrv_co_' + func.name[5:]
bs = 'bs' if func.args[0].type == 'BlockDriverState *' else 'child->bs'
struct_name = snake_to_camel(name)
return f"""\
/*
* Wrappers for {name}
*/
typedef struct {struct_name} {{
BdrvPollCo poll_state;
{ func.gen_block(' {decl};') }
}} {struct_name};
static void coroutine_fn {name}_entry(void *opaque)
{{
{struct_name} *s = opaque;
s->poll_state.ret = {name}({ func.gen_list('s->{name}') });
s->poll_state.in_progress = false;
aio_wait_kick();
}}
int {func.name}({ func.gen_list('{decl}') })
{{
if (qemu_in_coroutine()) {{
return {name}({ func.gen_list('{name}') });
}} else {{
{struct_name} s = {{
.poll_state.bs = {bs},
.poll_state.in_progress = true,
{ func.gen_block(' .{name} = {name},') }
}};
s.poll_state.co = qemu_coroutine_create({name}_entry, &s);
return bdrv_poll_co(&s.poll_state);
}}
}}"""
def gen_wrappers(input_code: str) -> str:
res = ''
for func in func_decl_iter(input_code):
res += '\n\n\n'
res += gen_wrapper(func)
return res
if __name__ == '__main__':
if len(sys.argv) < 3:
exit(f'Usage: {sys.argv[0]} OUT_FILE.c IN_FILE.[ch]...')
with open(sys.argv[1], 'w', encoding='utf-8') as f_out:
f_out.write(gen_header())
for fname in sys.argv[2:]:
with open(fname, encoding='utf-8') as f_in:
f_out.write(gen_wrappers(f_in.read()))
f_out.write('\n')

View File

@ -1872,7 +1872,7 @@ static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs,
}
s->io_co = NULL;
ret = bdrv_preadv(bs->backing, offset, qiov);
ret = bdrv_co_preadv(bs->backing, offset, bytes, qiov, 0);
s->has_read = true;
/* Wake up drain_co if it runs */

View File

@ -40,6 +40,11 @@ typedef struct {
uint64_t iova;
} IOVAMapping;
struct IOVARange {
uint64_t start;
uint64_t end;
};
struct QEMUVFIOState {
QemuMutex lock;
@ -49,6 +54,8 @@ struct QEMUVFIOState {
int device;
RAMBlockNotifier ram_notifier;
struct vfio_region_info config_region_info, bar_region_info[6];
struct IOVARange *usable_iova_ranges;
uint8_t nb_iova_ranges;
/* These fields are protected by @lock */
/* VFIO's IO virtual address space is managed by splitting into a few
@ -146,13 +153,13 @@ static int qemu_vfio_pci_init_bar(QEMUVFIOState *s, int index, Error **errp)
* Map a PCI bar area.
*/
void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
uint64_t offset, uint64_t size,
uint64_t offset, uint64_t size, int prot,
Error **errp)
{
void *p;
assert_bar_index_valid(s, index);
p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset),
PROT_READ | PROT_WRITE, MAP_SHARED,
prot, MAP_SHARED,
s->device, s->bar_region_info[index].offset + offset);
if (p == MAP_FAILED) {
error_setg_errno(errp, errno, "Failed to map BAR region");
@ -236,6 +243,35 @@ static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int
return ret == size ? 0 : -errno;
}
static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf)
{
struct vfio_iommu_type1_info *info = (struct vfio_iommu_type1_info *)buf;
struct vfio_info_cap_header *cap = (void *)buf + info->cap_offset;
struct vfio_iommu_type1_info_cap_iova_range *cap_iova_range;
int i;
while (cap->id != VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE) {
if (!cap->next) {
return;
}
cap = (struct vfio_info_cap_header *)(buf + cap->next);
}
cap_iova_range = (struct vfio_iommu_type1_info_cap_iova_range *)cap;
s->nb_iova_ranges = cap_iova_range->nr_iovas;
if (s->nb_iova_ranges > 1) {
s->usable_iova_ranges =
g_realloc(s->usable_iova_ranges,
s->nb_iova_ranges * sizeof(struct IOVARange));
}
for (i = 0; i < s->nb_iova_ranges; i++) {
s->usable_iova_ranges[i].start = cap_iova_range->iova_ranges[i].start;
s->usable_iova_ranges[i].end = cap_iova_range->iova_ranges[i].end;
}
}
static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
Error **errp)
{
@ -243,10 +279,13 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
int i;
uint16_t pci_cmd;
struct vfio_group_status group_status = { .argsz = sizeof(group_status) };
struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) };
struct vfio_iommu_type1_info *iommu_info = NULL;
size_t iommu_info_size = sizeof(*iommu_info);
struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
char *group_file = NULL;
s->usable_iova_ranges = NULL;
/* Create a new container */
s->container = open("/dev/vfio/vfio", O_RDWR);
@ -310,13 +349,35 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
goto fail;
}
iommu_info = g_malloc0(iommu_info_size);
iommu_info->argsz = iommu_info_size;
/* Get additional IOMMU info */
if (ioctl(s->container, VFIO_IOMMU_GET_INFO, &iommu_info)) {
if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
error_setg_errno(errp, errno, "Failed to get IOMMU info");
ret = -errno;
goto fail;
}
/*
* if the kernel does not report usable IOVA regions, choose
* the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region
*/
s->nb_iova_ranges = 1;
s->usable_iova_ranges = g_new0(struct IOVARange, 1);
s->usable_iova_ranges[0].start = QEMU_VFIO_IOVA_MIN;
s->usable_iova_ranges[0].end = QEMU_VFIO_IOVA_MAX - 1;
if (iommu_info->argsz > iommu_info_size) {
iommu_info_size = iommu_info->argsz;
iommu_info = g_realloc(iommu_info, iommu_info_size);
if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
ret = -errno;
goto fail;
}
collect_usable_iova_ranges(s, iommu_info);
}
s->device = ioctl(s->group, VFIO_GROUP_GET_DEVICE_FD, device);
if (s->device < 0) {
@ -365,8 +426,13 @@ static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
if (ret) {
goto fail;
}
g_free(iommu_info);
return 0;
fail:
g_free(s->usable_iova_ranges);
s->usable_iova_ranges = NULL;
s->nb_iova_ranges = 0;
g_free(iommu_info);
close(s->group);
fail_container:
close(s->container);
@ -601,6 +667,50 @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
return true;
}
static int
qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
{
int i;
for (i = 0; i < s->nb_iova_ranges; i++) {
if (s->usable_iova_ranges[i].end < s->low_water_mark) {
continue;
}
s->low_water_mark =
MAX(s->low_water_mark, s->usable_iova_ranges[i].start);
if (s->usable_iova_ranges[i].end - s->low_water_mark + 1 >= size ||
s->usable_iova_ranges[i].end - s->low_water_mark + 1 == 0) {
*iova = s->low_water_mark;
s->low_water_mark += size;
return 0;
}
}
return -ENOMEM;
}
static int
qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
{
int i;
for (i = s->nb_iova_ranges - 1; i >= 0; i--) {
if (s->usable_iova_ranges[i].start > s->high_water_mark) {
continue;
}
s->high_water_mark =
MIN(s->high_water_mark, s->usable_iova_ranges[i].end + 1);
if (s->high_water_mark - s->usable_iova_ranges[i].start + 1 >= size ||
s->high_water_mark - s->usable_iova_ranges[i].start + 1 == 0) {
*iova = s->high_water_mark - size;
s->high_water_mark = *iova;
return 0;
}
}
return -ENOMEM;
}
/* Map [host, host + size) area into a contiguous IOVA address space, and store
* the result in @iova if not NULL. The caller need to make sure the area is
* aligned to page size, and mustn't overlap with existing mapping areas (split
@ -627,7 +737,11 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
goto out;
}
if (!temporary) {
iova0 = s->low_water_mark;
if (qemu_vfio_find_fixed_iova(s, size, &iova0)) {
ret = -ENOMEM;
goto out;
}
mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
if (!mapping) {
ret = -ENOMEM;
@ -639,15 +753,16 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
qemu_vfio_undo_mapping(s, mapping, NULL);
goto out;
}
s->low_water_mark += size;
qemu_vfio_dump_mappings(s);
} else {
iova0 = s->high_water_mark - size;
if (qemu_vfio_find_temp_iova(s, size, &iova0)) {
ret = -ENOMEM;
goto out;
}
ret = qemu_vfio_do_mapping(s, host, size, iova0);
if (ret) {
goto out;
}
s->high_water_mark -= size;
}
}
if (iova) {
@ -716,6 +831,8 @@ void qemu_vfio_close(QEMUVFIOState *s)
qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
}
ram_block_notifier_remove(&s->ram_notifier);
g_free(s->usable_iova_ranges);
s->nb_iova_ranges = 0;
qemu_vfio_reset(s);
close(s->device);
close(s->group);