block/file-posix.c: extend to use io_uring

Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
Reviewed-by: Maxim Levitsky <maximlevitsky@gmail.com>
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20200120141858.587874-9-stefanha@redhat.com
Message-Id: <20200120141858.587874-9-stefanha@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Aarushi Mehta 2020-01-20 14:18:51 +00:00 committed by Stefan Hajnoczi
parent f80f267373
commit c644751069
1 changed files with 80 additions and 20 deletions

View File

@ -156,6 +156,7 @@ typedef struct BDRVRawState {
bool has_write_zeroes:1;
bool discard_zeroes:1;
bool use_linux_aio:1;
bool use_linux_io_uring:1;
bool page_cache_inconsistent:1;
bool has_fallocate;
bool needs_alignment;
@ -444,7 +445,7 @@ static QemuOptsList raw_runtime_opts = {
{
.name = "aio",
.type = QEMU_OPT_STRING,
.help = "host AIO implementation (threads, native)",
.help = "host AIO implementation (threads, native, io_uring)",
},
{
.name = "locking",
@ -503,9 +504,16 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
goto fail;
}
aio_default = (bdrv_flags & BDRV_O_NATIVE_AIO)
? BLOCKDEV_AIO_OPTIONS_NATIVE
: BLOCKDEV_AIO_OPTIONS_THREADS;
if (bdrv_flags & BDRV_O_NATIVE_AIO) {
aio_default = BLOCKDEV_AIO_OPTIONS_NATIVE;
#ifdef CONFIG_LINUX_IO_URING
} else if (bdrv_flags & BDRV_O_IO_URING) {
aio_default = BLOCKDEV_AIO_OPTIONS_IO_URING;
#endif
} else {
aio_default = BLOCKDEV_AIO_OPTIONS_THREADS;
}
aio = qapi_enum_parse(&BlockdevAioOptions_lookup,
qemu_opt_get(opts, "aio"),
aio_default, &local_err);
@ -514,7 +522,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
ret = -EINVAL;
goto fail;
}
s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);
#ifdef CONFIG_LINUX_IO_URING
s->use_linux_io_uring = (aio == BLOCKDEV_AIO_OPTIONS_IO_URING);
#endif
locking = qapi_enum_parse(&OnOffAuto_lookup,
qemu_opt_get(opts, "locking"),
@ -600,6 +612,22 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
}
#endif /* !defined(CONFIG_LINUX_AIO) */
#ifdef CONFIG_LINUX_IO_URING
if (s->use_linux_io_uring) {
if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) {
error_prepend(errp, "Unable to use io_uring: ");
goto fail;
}
}
#else
if (s->use_linux_io_uring) {
error_setg(errp, "aio=io_uring was specified, but is not supported "
"in this build.");
ret = -EINVAL;
goto fail;
}
#endif /* !defined(CONFIG_LINUX_IO_URING) */
s->has_discard = true;
s->has_write_zeroes = true;
if ((bs->open_flags & BDRV_O_NOCACHE) != 0) {
@ -1877,21 +1905,25 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
return -EIO;
/*
* Check if the underlying device requires requests to be aligned,
* and if the request we are trying to submit is aligned or not.
* If this is the case tell the low-level driver that it needs
* to copy the buffer.
* When using O_DIRECT, the request must be aligned to be able to use
* either libaio or io_uring interface. If not fail back to regular thread
* pool read/write code which emulates this for us if we
* set QEMU_AIO_MISALIGNED.
*/
if (s->needs_alignment) {
if (!bdrv_qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
} else if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
assert(qiov->size == bytes);
return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_IO_URING
} else if (s->use_linux_io_uring) {
LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
assert(qiov->size == bytes);
return luring_co_submit(bs, aio, s->fd, offset, qiov, type);
#endif
#ifdef CONFIG_LINUX_AIO
} else if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
assert(qiov->size == bytes);
return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
#endif
}
}
acb = (RawPosixAIOData) {
@ -1927,24 +1959,36 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
static void raw_aio_plug(BlockDriverState *bs)
{
BDRVRawState __attribute__((unused)) *s = bs->opaque;
#ifdef CONFIG_LINUX_AIO
BDRVRawState *s = bs->opaque;
if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
laio_io_plug(bs, aio);
}
#endif
#ifdef CONFIG_LINUX_IO_URING
if (s->use_linux_io_uring) {
LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
luring_io_plug(bs, aio);
}
#endif
}
static void raw_aio_unplug(BlockDriverState *bs)
{
BDRVRawState __attribute__((unused)) *s = bs->opaque;
#ifdef CONFIG_LINUX_AIO
BDRVRawState *s = bs->opaque;
if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
laio_io_unplug(bs, aio);
}
#endif
#ifdef CONFIG_LINUX_IO_URING
if (s->use_linux_io_uring) {
LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
luring_io_unplug(bs, aio);
}
#endif
}
static int raw_co_flush_to_disk(BlockDriverState *bs)
@ -1964,14 +2008,20 @@ static int raw_co_flush_to_disk(BlockDriverState *bs)
.aio_type = QEMU_AIO_FLUSH,
};
#ifdef CONFIG_LINUX_IO_URING
if (s->use_linux_io_uring) {
LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
return luring_co_submit(bs, aio, s->fd, 0, NULL, QEMU_AIO_FLUSH);
}
#endif
return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb);
}
static void raw_aio_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
BDRVRawState __attribute__((unused)) *s = bs->opaque;
#ifdef CONFIG_LINUX_AIO
BDRVRawState *s = bs->opaque;
if (s->use_linux_aio) {
Error *local_err = NULL;
if (!aio_setup_linux_aio(new_context, &local_err)) {
@ -1981,6 +2031,16 @@ static void raw_aio_attach_aio_context(BlockDriverState *bs,
}
}
#endif
#ifdef CONFIG_LINUX_IO_URING
if (s->use_linux_io_uring) {
Error *local_err;
if (!aio_setup_linux_io_uring(new_context, &local_err)) {
error_reportf_err(local_err, "Unable to use linux io_uring, "
"falling back to thread pool: ");
s->use_linux_io_uring = false;
}
}
#endif
}
static void raw_close(BlockDriverState *bs)