84d61e5f36
virtio-blk and virtio-scsi invoke virtio_irqfd_notify() to send Used Buffer Notifications from an IOThread. This involves an eventfd write(2) syscall. Calling this repeatedly when completing multiple I/O requests in a row is wasteful. Use the defer_call() API to batch together virtio_irqfd_notify() calls made during thread pool (aio=threads), Linux AIO (aio=native), and io_uring (aio=io_uring) completion processing. Behavior is unchanged for emulated devices that do not use defer_call_begin()/defer_call_end() since defer_call() immediately invokes the callback when called outside a defer_call_begin()/defer_call_end() region. fio rw=randread bs=4k iodepth=64 numjobs=8 IOPS increases by ~9% with a single IOThread and 8 vCPUs. iodepth=1 decreases by ~1% but this could be noise. Detailed performance data and configuration specifics are available here: https://gitlab.com/stefanha/virt-playbooks/-/tree/blk_io_plug-irqfd This duplicates the BH that virtio-blk uses for batching. The next commit will remove it. Reviewed-by: Eric Blake <eblake@redhat.com> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Message-ID: <20230913200045.1024233-4-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
||
---|---|---|
.. | ||
aio-posix.c | ||
aio-posix.h | ||
aio-wait.c | ||
aio-win32.c | ||
aiocb.c | ||
async.c | ||
atomic64.c | ||
base64.c | ||
bitmap.c | ||
bitops.c | ||
block-helpers.c | ||
block-helpers.h | ||
buffer.c | ||
bufferiszero.c | ||
cacheflush.c | ||
compatfd.c | ||
coroutine-sigaltstack.c | ||
coroutine-ucontext.c | ||
coroutine-windows.c | ||
cpuinfo-aarch64.c | ||
cpuinfo-i386.c | ||
cpuinfo-ppc.c | ||
crc32c.c | ||
crc-ccitt.c | ||
cutils.c | ||
dbus.c | ||
defer-call.c | ||
drm.c | ||
envlist.c | ||
error-report.c | ||
error.c | ||
event_notifier-posix.c | ||
event_notifier-win32.c | ||
fdmon-epoll.c | ||
fdmon-io_uring.c | ||
fdmon-poll.c | ||
fifo8.c | ||
filemonitor-inotify.c | ||
filemonitor-stub.c | ||
getauxval.c | ||
guest-random.c | ||
hbitmap.c | ||
hexdump.c | ||
host-utils.c | ||
id.c | ||
int128.c | ||
interval-tree.c | ||
iov.c | ||
iova-tree.c | ||
keyval.c | ||
lockcnt.c | ||
log.c | ||
main-loop.c | ||
memalign.c | ||
memfd.c | ||
meson.build | ||
mmap-alloc.c | ||
module.c | ||
notify.c | ||
nvdimm-utils.c | ||
osdep.c | ||
oslib-posix.c | ||
oslib-win32.c | ||
path.c | ||
qdist.c | ||
qemu-co-shared-resource.c | ||
qemu-co-timeout.c | ||
qemu-config.c | ||
qemu-coroutine-io.c | ||
qemu-coroutine-lock.c | ||
qemu-coroutine-sleep.c | ||
qemu-coroutine.c | ||
qemu-option.c | ||
qemu-print.c | ||
qemu-progress.c | ||
qemu-sockets.c | ||
qemu-thread-common.h | ||
qemu-thread-posix.c | ||
qemu-thread-win32.c | ||
qemu-timer-common.c | ||
qemu-timer.c | ||
qht.c | ||
qsp.c | ||
qtree.c | ||
range.c | ||
rcu.c | ||
readline.c | ||
selfmap.c | ||
stats64.c | ||
sys_membarrier.c | ||
systemd.c | ||
thread-context.c | ||
thread-pool.c | ||
throttle.c | ||
timed-average.c | ||
trace-events | ||
trace.h | ||
transactions.c | ||
unicode.c | ||
uri.c | ||
userfaultfd.c | ||
uuid.c | ||
vfio-helpers.c | ||
vhost-user-server.c | ||
yank.c |