Merge remote-tracking branch 'bonzini/threadpool' into staging
* bonzini/threadpool: (39 commits) raw-win32: implement native asynchronous I/O raw-posix: move linux-aio.c to block/ raw-win32: add emulated AIO support raw-posix: rename raw-posix-aio.h, hide unavailable prototypes raw: merge posix-aio-compat.c into block/raw-posix.c block: switch posix-aio-compat to threadpool threadpool: do not take lock in event_notifier_ready aio: add generic thread-pool facility qemu-thread: add QemuSemaphore linux-aio: use event notifiers aio: clean up now-unused functions main-loop: use aio_notify for qemu_notify_event main-loop: use GSource to poll AIO file descriptors aio: call aio_notify after setting I/O handlers aio: add aio_notify aio: make AioContexts GSources aio: add Win32 implementation aio: prepare for introducing GSource-based dispatch aio: add non-blocking variant of aio_wait aio: test node->deleted before calling io_flush ... Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
commit
5a34dbb559
7
Makefile
7
Makefile
@ -171,8 +171,7 @@ endif
|
||||
qemu-img.o: qemu-img-cmds.h
|
||||
|
||||
tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \
|
||||
qemu-timer-common.o main-loop.o notify.o \
|
||||
iohandler.o cutils.o iov.o async.o error.o
|
||||
main-loop.o iohandler.o error.o
|
||||
tools-obj-$(CONFIG_POSIX) += compatfd.o
|
||||
|
||||
qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y)
|
||||
@ -181,7 +180,7 @@ qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) $(block-obj-y)
|
||||
|
||||
qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o
|
||||
|
||||
vscclient$(EXESUF): $(libcacard-y) $(oslib-obj-y) $(trace-obj-y) $(tools-obj-y) qemu-timer-common.o libcacard/vscclient.o
|
||||
vscclient$(EXESUF): $(libcacard-y) $(oslib-obj-y) $(trace-obj-y) libcacard/vscclient.o
|
||||
$(call quiet-command,$(CC) $(LDFLAGS) -o $@ $^ $(libcacard_libs) $(LIBS)," LINK $@")
|
||||
|
||||
fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/virtio-9p-marshal.o oslib-posix.o $(trace-obj-y)
|
||||
@ -224,7 +223,7 @@ $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
|
||||
QGALIB_GEN=$(addprefix qga/qapi-generated/, qga-qapi-types.h qga-qapi-visit.h qga-qmp-commands.h)
|
||||
$(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)
|
||||
|
||||
qemu-ga$(EXESUF): qemu-ga.o $(qga-obj-y) $(tools-obj-y) $(qapi-obj-y) $(qobject-obj-y) $(version-obj-y)
|
||||
qemu-ga$(EXESUF): qemu-ga.o $(qga-obj-y) $(oslib-obj-y) $(trace-obj-y) $(qapi-obj-y) $(qobject-obj-y) $(version-obj-y)
|
||||
|
||||
QEMULIBS=libuser libdis libdis-user
|
||||
|
||||
|
@ -19,7 +19,7 @@ universal-obj-y += $(qom-obj-y)
|
||||
|
||||
#######################################################################
|
||||
# oslib-obj-y is code depending on the OS (win32 vs posix)
|
||||
oslib-obj-y = osdep.o
|
||||
oslib-obj-y = osdep.o cutils.o qemu-timer-common.o
|
||||
oslib-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o
|
||||
oslib-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o
|
||||
|
||||
@ -41,12 +41,12 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
|
||||
#######################################################################
|
||||
# block-obj-y is code used by both qemu system emulation and qemu-img
|
||||
|
||||
block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o
|
||||
block-obj-y += nbd.o block.o blockjob.o aio.o aes.o qemu-config.o
|
||||
block-obj-y += qemu-progress.o qemu-sockets.o uri.o notify.o
|
||||
block-obj-y = iov.o cache-utils.o qemu-option.o module.o async.o
|
||||
block-obj-y += nbd.o block.o blockjob.o aes.o qemu-config.o
|
||||
block-obj-y += thread-pool.o qemu-progress.o qemu-sockets.o uri.o notify.o
|
||||
block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
|
||||
block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
|
||||
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
|
||||
block-obj-$(CONFIG_POSIX) += event_notifier-posix.o aio-posix.o
|
||||
block-obj-$(CONFIG_WIN32) += event_notifier-win32.o aio-win32.o
|
||||
block-obj-y += block/
|
||||
block-obj-y += $(qapi-obj-y) qapi-types.o qapi-visit.o
|
||||
|
||||
@ -92,9 +92,8 @@ common-obj-y += ui/
|
||||
common-obj-y += bt-host.o bt-vhci.o
|
||||
|
||||
common-obj-y += dma-helpers.o
|
||||
common-obj-y += iov.o acl.o
|
||||
common-obj-y += acl.o
|
||||
common-obj-$(CONFIG_POSIX) += compatfd.o
|
||||
common-obj-y += event_notifier.o
|
||||
common-obj-y += qemu-timer.o qemu-timer-common.o
|
||||
common-obj-y += qtest.o
|
||||
common-obj-y += vl.o
|
||||
@ -113,7 +112,7 @@ endif
|
||||
user-obj-y =
|
||||
user-obj-y += envlist.o path.o
|
||||
user-obj-y += tcg-runtime.o host-utils.o
|
||||
user-obj-y += cutils.o iov.o cache-utils.o
|
||||
user-obj-y += cache-utils.o
|
||||
user-obj-y += module.o
|
||||
user-obj-y += qemu-user.o
|
||||
user-obj-y += $(trace-obj-y)
|
||||
@ -228,9 +227,8 @@ universal-obj-y += $(qapi-obj-y)
|
||||
######################################################################
|
||||
# guest agent
|
||||
|
||||
qga-obj-y = qga/ qemu-ga.o module.o
|
||||
qga-obj-$(CONFIG_WIN32) += oslib-win32.o
|
||||
qga-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-sockets.o qemu-option.o
|
||||
qga-obj-y = qga/ qemu-ga.o module.o qemu-tool.o
|
||||
qga-obj-$(CONFIG_POSIX) += qemu-sockets.o qemu-option.o
|
||||
|
||||
vl.o: QEMU_CFLAGS+=$(GPROF_CFLAGS)
|
||||
|
||||
|
268
aio-posix.c
Normal file
268
aio-posix.c
Normal file
@ -0,0 +1,268 @@
|
||||
/*
|
||||
* QEMU aio implementation
|
||||
*
|
||||
* Copyright IBM, Corp. 2008
|
||||
*
|
||||
* Authors:
|
||||
* Anthony Liguori <aliguori@us.ibm.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
*
|
||||
* Contributions after 2012-01-13 are licensed under the terms of the
|
||||
* GNU GPL, version 2 or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include "qemu-common.h"
|
||||
#include "block.h"
|
||||
#include "qemu-queue.h"
|
||||
#include "qemu_socket.h"
|
||||
|
||||
struct AioHandler
|
||||
{
|
||||
GPollFD pfd;
|
||||
IOHandler *io_read;
|
||||
IOHandler *io_write;
|
||||
AioFlushHandler *io_flush;
|
||||
int deleted;
|
||||
void *opaque;
|
||||
QLIST_ENTRY(AioHandler) node;
|
||||
};
|
||||
|
||||
static AioHandler *find_aio_handler(AioContext *ctx, int fd)
|
||||
{
|
||||
AioHandler *node;
|
||||
|
||||
QLIST_FOREACH(node, &ctx->aio_handlers, node) {
|
||||
if (node->pfd.fd == fd)
|
||||
if (!node->deleted)
|
||||
return node;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void aio_set_fd_handler(AioContext *ctx,
|
||||
int fd,
|
||||
IOHandler *io_read,
|
||||
IOHandler *io_write,
|
||||
AioFlushHandler *io_flush,
|
||||
void *opaque)
|
||||
{
|
||||
AioHandler *node;
|
||||
|
||||
node = find_aio_handler(ctx, fd);
|
||||
|
||||
/* Are we deleting the fd handler? */
|
||||
if (!io_read && !io_write) {
|
||||
if (node) {
|
||||
g_source_remove_poll(&ctx->source, &node->pfd);
|
||||
|
||||
/* If the lock is held, just mark the node as deleted */
|
||||
if (ctx->walking_handlers) {
|
||||
node->deleted = 1;
|
||||
node->pfd.revents = 0;
|
||||
} else {
|
||||
/* Otherwise, delete it for real. We can't just mark it as
|
||||
* deleted because deleted nodes are only cleaned up after
|
||||
* releasing the walking_handlers lock.
|
||||
*/
|
||||
QLIST_REMOVE(node, node);
|
||||
g_free(node);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (node == NULL) {
|
||||
/* Alloc and insert if it's not already there */
|
||||
node = g_malloc0(sizeof(AioHandler));
|
||||
node->pfd.fd = fd;
|
||||
QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
|
||||
|
||||
g_source_add_poll(&ctx->source, &node->pfd);
|
||||
}
|
||||
/* Update handler with latest information */
|
||||
node->io_read = io_read;
|
||||
node->io_write = io_write;
|
||||
node->io_flush = io_flush;
|
||||
node->opaque = opaque;
|
||||
|
||||
node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP : 0);
|
||||
node->pfd.events |= (io_write ? G_IO_OUT : 0);
|
||||
}
|
||||
|
||||
aio_notify(ctx);
|
||||
}
|
||||
|
||||
void aio_set_event_notifier(AioContext *ctx,
|
||||
EventNotifier *notifier,
|
||||
EventNotifierHandler *io_read,
|
||||
AioFlushEventNotifierHandler *io_flush)
|
||||
{
|
||||
aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
|
||||
(IOHandler *)io_read, NULL,
|
||||
(AioFlushHandler *)io_flush, notifier);
|
||||
}
|
||||
|
||||
bool aio_pending(AioContext *ctx)
|
||||
{
|
||||
AioHandler *node;
|
||||
|
||||
QLIST_FOREACH(node, &ctx->aio_handlers, node) {
|
||||
int revents;
|
||||
|
||||
/*
|
||||
* FIXME: right now we cannot get G_IO_HUP and G_IO_ERR because
|
||||
* main-loop.c is still select based (due to the slirp legacy).
|
||||
* If main-loop.c ever switches to poll, G_IO_ERR should be
|
||||
* tested too. Dispatching G_IO_ERR to both handlers should be
|
||||
* okay, since handlers need to be ready for spurious wakeups.
|
||||
*/
|
||||
revents = node->pfd.revents & node->pfd.events;
|
||||
if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
|
||||
return true;
|
||||
}
|
||||
if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool aio_poll(AioContext *ctx, bool blocking)
|
||||
{
|
||||
static struct timeval tv0;
|
||||
AioHandler *node;
|
||||
fd_set rdfds, wrfds;
|
||||
int max_fd = -1;
|
||||
int ret;
|
||||
bool busy, progress;
|
||||
|
||||
progress = false;
|
||||
|
||||
/*
|
||||
* If there are callbacks left that have been queued, we need to call then.
|
||||
* Do not call select in this case, because it is possible that the caller
|
||||
* does not need a complete flush (as is the case for qemu_aio_wait loops).
|
||||
*/
|
||||
if (aio_bh_poll(ctx)) {
|
||||
blocking = false;
|
||||
progress = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Then dispatch any pending callbacks from the GSource.
|
||||
*
|
||||
* We have to walk very carefully in case qemu_aio_set_fd_handler is
|
||||
* called while we're walking.
|
||||
*/
|
||||
node = QLIST_FIRST(&ctx->aio_handlers);
|
||||
while (node) {
|
||||
AioHandler *tmp;
|
||||
int revents;
|
||||
|
||||
ctx->walking_handlers++;
|
||||
|
||||
revents = node->pfd.revents & node->pfd.events;
|
||||
node->pfd.revents = 0;
|
||||
|
||||
/* See comment in aio_pending. */
|
||||
if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
|
||||
node->io_read(node->opaque);
|
||||
progress = true;
|
||||
}
|
||||
if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
|
||||
node->io_write(node->opaque);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
tmp = node;
|
||||
node = QLIST_NEXT(node, node);
|
||||
|
||||
ctx->walking_handlers--;
|
||||
|
||||
if (!ctx->walking_handlers && tmp->deleted) {
|
||||
QLIST_REMOVE(tmp, node);
|
||||
g_free(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
if (progress && !blocking) {
|
||||
return true;
|
||||
}
|
||||
|
||||
ctx->walking_handlers++;
|
||||
|
||||
FD_ZERO(&rdfds);
|
||||
FD_ZERO(&wrfds);
|
||||
|
||||
/* fill fd sets */
|
||||
busy = false;
|
||||
QLIST_FOREACH(node, &ctx->aio_handlers, node) {
|
||||
/* If there aren't pending AIO operations, don't invoke callbacks.
|
||||
* Otherwise, if there are no AIO requests, qemu_aio_wait() would
|
||||
* wait indefinitely.
|
||||
*/
|
||||
if (!node->deleted && node->io_flush) {
|
||||
if (node->io_flush(node->opaque) == 0) {
|
||||
continue;
|
||||
}
|
||||
busy = true;
|
||||
}
|
||||
if (!node->deleted && node->io_read) {
|
||||
FD_SET(node->pfd.fd, &rdfds);
|
||||
max_fd = MAX(max_fd, node->pfd.fd + 1);
|
||||
}
|
||||
if (!node->deleted && node->io_write) {
|
||||
FD_SET(node->pfd.fd, &wrfds);
|
||||
max_fd = MAX(max_fd, node->pfd.fd + 1);
|
||||
}
|
||||
}
|
||||
|
||||
ctx->walking_handlers--;
|
||||
|
||||
/* No AIO operations? Get us out of here */
|
||||
if (!busy) {
|
||||
return progress;
|
||||
}
|
||||
|
||||
/* wait until next event */
|
||||
ret = select(max_fd, &rdfds, &wrfds, NULL, blocking ? NULL : &tv0);
|
||||
|
||||
/* if we have any readable fds, dispatch event */
|
||||
if (ret > 0) {
|
||||
/* we have to walk very carefully in case
|
||||
* qemu_aio_set_fd_handler is called while we're walking */
|
||||
node = QLIST_FIRST(&ctx->aio_handlers);
|
||||
while (node) {
|
||||
AioHandler *tmp;
|
||||
|
||||
ctx->walking_handlers++;
|
||||
|
||||
if (!node->deleted &&
|
||||
FD_ISSET(node->pfd.fd, &rdfds) &&
|
||||
node->io_read) {
|
||||
node->io_read(node->opaque);
|
||||
progress = true;
|
||||
}
|
||||
if (!node->deleted &&
|
||||
FD_ISSET(node->pfd.fd, &wrfds) &&
|
||||
node->io_write) {
|
||||
node->io_write(node->opaque);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
tmp = node;
|
||||
node = QLIST_NEXT(node, node);
|
||||
|
||||
ctx->walking_handlers--;
|
||||
|
||||
if (!ctx->walking_handlers && tmp->deleted) {
|
||||
QLIST_REMOVE(tmp, node);
|
||||
g_free(tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
215
aio-win32.c
Normal file
215
aio-win32.c
Normal file
@ -0,0 +1,215 @@
|
||||
/*
|
||||
* QEMU aio implementation
|
||||
*
|
||||
* Copyright IBM Corp., 2008
|
||||
* Copyright Red Hat Inc., 2012
|
||||
*
|
||||
* Authors:
|
||||
* Anthony Liguori <aliguori@us.ibm.com>
|
||||
* Paolo Bonzini <pbonzini@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
*
|
||||
* Contributions after 2012-01-13 are licensed under the terms of the
|
||||
* GNU GPL, version 2 or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include "qemu-common.h"
|
||||
#include "block.h"
|
||||
#include "qemu-queue.h"
|
||||
#include "qemu_socket.h"
|
||||
|
||||
struct AioHandler {
|
||||
EventNotifier *e;
|
||||
EventNotifierHandler *io_notify;
|
||||
AioFlushEventNotifierHandler *io_flush;
|
||||
GPollFD pfd;
|
||||
int deleted;
|
||||
QLIST_ENTRY(AioHandler) node;
|
||||
};
|
||||
|
||||
void aio_set_event_notifier(AioContext *ctx,
|
||||
EventNotifier *e,
|
||||
EventNotifierHandler *io_notify,
|
||||
AioFlushEventNotifierHandler *io_flush)
|
||||
{
|
||||
AioHandler *node;
|
||||
|
||||
QLIST_FOREACH(node, &ctx->aio_handlers, node) {
|
||||
if (node->e == e && !node->deleted) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Are we deleting the fd handler? */
|
||||
if (!io_notify) {
|
||||
if (node) {
|
||||
g_source_remove_poll(&ctx->source, &node->pfd);
|
||||
|
||||
/* If the lock is held, just mark the node as deleted */
|
||||
if (ctx->walking_handlers) {
|
||||
node->deleted = 1;
|
||||
node->pfd.revents = 0;
|
||||
} else {
|
||||
/* Otherwise, delete it for real. We can't just mark it as
|
||||
* deleted because deleted nodes are only cleaned up after
|
||||
* releasing the walking_handlers lock.
|
||||
*/
|
||||
QLIST_REMOVE(node, node);
|
||||
g_free(node);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (node == NULL) {
|
||||
/* Alloc and insert if it's not already there */
|
||||
node = g_malloc0(sizeof(AioHandler));
|
||||
node->e = e;
|
||||
node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
|
||||
node->pfd.events = G_IO_IN;
|
||||
QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
|
||||
|
||||
g_source_add_poll(&ctx->source, &node->pfd);
|
||||
}
|
||||
/* Update handler with latest information */
|
||||
node->io_notify = io_notify;
|
||||
node->io_flush = io_flush;
|
||||
}
|
||||
|
||||
aio_notify(ctx);
|
||||
}
|
||||
|
||||
bool aio_pending(AioContext *ctx)
|
||||
{
|
||||
AioHandler *node;
|
||||
|
||||
QLIST_FOREACH(node, &ctx->aio_handlers, node) {
|
||||
if (node->pfd.revents && node->io_notify) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool aio_poll(AioContext *ctx, bool blocking)
|
||||
{
|
||||
AioHandler *node;
|
||||
HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
|
||||
bool busy, progress;
|
||||
int count;
|
||||
|
||||
progress = false;
|
||||
|
||||
/*
|
||||
* If there are callbacks left that have been queued, we need to call then.
|
||||
* Do not call select in this case, because it is possible that the caller
|
||||
* does not need a complete flush (as is the case for qemu_aio_wait loops).
|
||||
*/
|
||||
if (aio_bh_poll(ctx)) {
|
||||
blocking = false;
|
||||
progress = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Then dispatch any pending callbacks from the GSource.
|
||||
*
|
||||
* We have to walk very carefully in case qemu_aio_set_fd_handler is
|
||||
* called while we're walking.
|
||||
*/
|
||||
node = QLIST_FIRST(&ctx->aio_handlers);
|
||||
while (node) {
|
||||
AioHandler *tmp;
|
||||
|
||||
ctx->walking_handlers++;
|
||||
|
||||
if (node->pfd.revents && node->io_notify) {
|
||||
node->pfd.revents = 0;
|
||||
node->io_notify(node->e);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
tmp = node;
|
||||
node = QLIST_NEXT(node, node);
|
||||
|
||||
ctx->walking_handlers--;
|
||||
|
||||
if (!ctx->walking_handlers && tmp->deleted) {
|
||||
QLIST_REMOVE(tmp, node);
|
||||
g_free(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
if (progress && !blocking) {
|
||||
return true;
|
||||
}
|
||||
|
||||
ctx->walking_handlers++;
|
||||
|
||||
/* fill fd sets */
|
||||
busy = false;
|
||||
count = 0;
|
||||
QLIST_FOREACH(node, &ctx->aio_handlers, node) {
|
||||
/* If there aren't pending AIO operations, don't invoke callbacks.
|
||||
* Otherwise, if there are no AIO requests, qemu_aio_wait() would
|
||||
* wait indefinitely.
|
||||
*/
|
||||
if (!node->deleted && node->io_flush) {
|
||||
if (node->io_flush(node->e) == 0) {
|
||||
continue;
|
||||
}
|
||||
busy = true;
|
||||
}
|
||||
if (!node->deleted && node->io_notify) {
|
||||
events[count++] = event_notifier_get_handle(node->e);
|
||||
}
|
||||
}
|
||||
|
||||
ctx->walking_handlers--;
|
||||
|
||||
/* No AIO operations? Get us out of here */
|
||||
if (!busy) {
|
||||
return progress;
|
||||
}
|
||||
|
||||
/* wait until next event */
|
||||
for (;;) {
|
||||
int timeout = blocking ? INFINITE : 0;
|
||||
int ret = WaitForMultipleObjects(count, events, FALSE, timeout);
|
||||
|
||||
/* if we have any signaled events, dispatch event */
|
||||
if ((DWORD) (ret - WAIT_OBJECT_0) >= count) {
|
||||
break;
|
||||
}
|
||||
|
||||
blocking = false;
|
||||
|
||||
/* we have to walk very carefully in case
|
||||
* qemu_aio_set_fd_handler is called while we're walking */
|
||||
node = QLIST_FIRST(&ctx->aio_handlers);
|
||||
while (node) {
|
||||
AioHandler *tmp;
|
||||
|
||||
ctx->walking_handlers++;
|
||||
|
||||
if (!node->deleted &&
|
||||
event_notifier_get_handle(node->e) == events[ret - WAIT_OBJECT_0] &&
|
||||
node->io_notify) {
|
||||
node->io_notify(node->e);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
tmp = node;
|
||||
node = QLIST_NEXT(node, node);
|
||||
|
||||
ctx->walking_handlers--;
|
||||
|
||||
if (!ctx->walking_handlers && tmp->deleted) {
|
||||
QLIST_REMOVE(tmp, node);
|
||||
g_free(tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
194
aio.c
194
aio.c
@ -1,194 +0,0 @@
|
||||
/*
|
||||
* QEMU aio implementation
|
||||
*
|
||||
* Copyright IBM, Corp. 2008
|
||||
*
|
||||
* Authors:
|
||||
* Anthony Liguori <aliguori@us.ibm.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
*
|
||||
* Contributions after 2012-01-13 are licensed under the terms of the
|
||||
* GNU GPL, version 2 or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include "qemu-common.h"
|
||||
#include "block.h"
|
||||
#include "qemu-queue.h"
|
||||
#include "qemu_socket.h"
|
||||
|
||||
typedef struct AioHandler AioHandler;
|
||||
|
||||
/* The list of registered AIO handlers */
|
||||
static QLIST_HEAD(, AioHandler) aio_handlers;
|
||||
|
||||
/* This is a simple lock used to protect the aio_handlers list. Specifically,
|
||||
* it's used to ensure that no callbacks are removed while we're walking and
|
||||
* dispatching callbacks.
|
||||
*/
|
||||
static int walking_handlers;
|
||||
|
||||
struct AioHandler
|
||||
{
|
||||
int fd;
|
||||
IOHandler *io_read;
|
||||
IOHandler *io_write;
|
||||
AioFlushHandler *io_flush;
|
||||
int deleted;
|
||||
void *opaque;
|
||||
QLIST_ENTRY(AioHandler) node;
|
||||
};
|
||||
|
||||
static AioHandler *find_aio_handler(int fd)
|
||||
{
|
||||
AioHandler *node;
|
||||
|
||||
QLIST_FOREACH(node, &aio_handlers, node) {
|
||||
if (node->fd == fd)
|
||||
if (!node->deleted)
|
||||
return node;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int qemu_aio_set_fd_handler(int fd,
|
||||
IOHandler *io_read,
|
||||
IOHandler *io_write,
|
||||
AioFlushHandler *io_flush,
|
||||
void *opaque)
|
||||
{
|
||||
AioHandler *node;
|
||||
|
||||
node = find_aio_handler(fd);
|
||||
|
||||
/* Are we deleting the fd handler? */
|
||||
if (!io_read && !io_write) {
|
||||
if (node) {
|
||||
/* If the lock is held, just mark the node as deleted */
|
||||
if (walking_handlers)
|
||||
node->deleted = 1;
|
||||
else {
|
||||
/* Otherwise, delete it for real. We can't just mark it as
|
||||
* deleted because deleted nodes are only cleaned up after
|
||||
* releasing the walking_handlers lock.
|
||||
*/
|
||||
QLIST_REMOVE(node, node);
|
||||
g_free(node);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (node == NULL) {
|
||||
/* Alloc and insert if it's not already there */
|
||||
node = g_malloc0(sizeof(AioHandler));
|
||||
node->fd = fd;
|
||||
QLIST_INSERT_HEAD(&aio_handlers, node, node);
|
||||
}
|
||||
/* Update handler with latest information */
|
||||
node->io_read = io_read;
|
||||
node->io_write = io_write;
|
||||
node->io_flush = io_flush;
|
||||
node->opaque = opaque;
|
||||
}
|
||||
|
||||
qemu_set_fd_handler2(fd, NULL, io_read, io_write, opaque);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void qemu_aio_flush(void)
|
||||
{
|
||||
while (qemu_aio_wait());
|
||||
}
|
||||
|
||||
bool qemu_aio_wait(void)
|
||||
{
|
||||
AioHandler *node;
|
||||
fd_set rdfds, wrfds;
|
||||
int max_fd = -1;
|
||||
int ret;
|
||||
bool busy;
|
||||
|
||||
/*
|
||||
* If there are callbacks left that have been queued, we need to call then.
|
||||
* Do not call select in this case, because it is possible that the caller
|
||||
* does not need a complete flush (as is the case for qemu_aio_wait loops).
|
||||
*/
|
||||
if (qemu_bh_poll()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
walking_handlers++;
|
||||
|
||||
FD_ZERO(&rdfds);
|
||||
FD_ZERO(&wrfds);
|
||||
|
||||
/* fill fd sets */
|
||||
busy = false;
|
||||
QLIST_FOREACH(node, &aio_handlers, node) {
|
||||
/* If there aren't pending AIO operations, don't invoke callbacks.
|
||||
* Otherwise, if there are no AIO requests, qemu_aio_wait() would
|
||||
* wait indefinitely.
|
||||
*/
|
||||
if (node->io_flush) {
|
||||
if (node->io_flush(node->opaque) == 0) {
|
||||
continue;
|
||||
}
|
||||
busy = true;
|
||||
}
|
||||
if (!node->deleted && node->io_read) {
|
||||
FD_SET(node->fd, &rdfds);
|
||||
max_fd = MAX(max_fd, node->fd + 1);
|
||||
}
|
||||
if (!node->deleted && node->io_write) {
|
||||
FD_SET(node->fd, &wrfds);
|
||||
max_fd = MAX(max_fd, node->fd + 1);
|
||||
}
|
||||
}
|
||||
|
||||
walking_handlers--;
|
||||
|
||||
/* No AIO operations? Get us out of here */
|
||||
if (!busy) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* wait until next event */
|
||||
ret = select(max_fd, &rdfds, &wrfds, NULL, NULL);
|
||||
|
||||
/* if we have any readable fds, dispatch event */
|
||||
if (ret > 0) {
|
||||
/* we have to walk very carefully in case
|
||||
* qemu_aio_set_fd_handler is called while we're walking */
|
||||
node = QLIST_FIRST(&aio_handlers);
|
||||
while (node) {
|
||||
AioHandler *tmp;
|
||||
|
||||
walking_handlers++;
|
||||
|
||||
if (!node->deleted &&
|
||||
FD_ISSET(node->fd, &rdfds) &&
|
||||
node->io_read) {
|
||||
node->io_read(node->opaque);
|
||||
}
|
||||
if (!node->deleted &&
|
||||
FD_ISSET(node->fd, &wrfds) &&
|
||||
node->io_write) {
|
||||
node->io_write(node->opaque);
|
||||
}
|
||||
|
||||
tmp = node;
|
||||
node = QLIST_NEXT(node, node);
|
||||
|
||||
walking_handlers--;
|
||||
|
||||
if (!walking_handlers && tmp->deleted) {
|
||||
QLIST_REMOVE(tmp, node);
|
||||
g_free(tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
@ -34,6 +34,6 @@ int tcg_available(void);
|
||||
int kvm_available(void);
|
||||
int xen_available(void);
|
||||
|
||||
CpuDefinitionInfoList GCC_WEAK_DECL *arch_query_cpu_definitions(Error **errp);
|
||||
CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp);
|
||||
|
||||
#endif
|
||||
|
118
async.c
118
async.c
@ -26,13 +26,11 @@
|
||||
#include "qemu-aio.h"
|
||||
#include "main-loop.h"
|
||||
|
||||
/* Anchor of the list of Bottom Halves belonging to the context */
|
||||
static struct QEMUBH *first_bh;
|
||||
|
||||
/***********************************************************/
|
||||
/* bottom halves (can be seen as timers which expire ASAP) */
|
||||
|
||||
struct QEMUBH {
|
||||
AioContext *ctx;
|
||||
QEMUBHFunc *cb;
|
||||
void *opaque;
|
||||
QEMUBH *next;
|
||||
@ -41,27 +39,27 @@ struct QEMUBH {
|
||||
bool deleted;
|
||||
};
|
||||
|
||||
QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
|
||||
QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
|
||||
{
|
||||
QEMUBH *bh;
|
||||
bh = g_malloc0(sizeof(QEMUBH));
|
||||
bh->ctx = ctx;
|
||||
bh->cb = cb;
|
||||
bh->opaque = opaque;
|
||||
bh->next = first_bh;
|
||||
first_bh = bh;
|
||||
bh->next = ctx->first_bh;
|
||||
ctx->first_bh = bh;
|
||||
return bh;
|
||||
}
|
||||
|
||||
int qemu_bh_poll(void)
|
||||
int aio_bh_poll(AioContext *ctx)
|
||||
{
|
||||
QEMUBH *bh, **bhp, *next;
|
||||
int ret;
|
||||
static int nesting = 0;
|
||||
|
||||
nesting++;
|
||||
ctx->walking_bh++;
|
||||
|
||||
ret = 0;
|
||||
for (bh = first_bh; bh; bh = next) {
|
||||
for (bh = ctx->first_bh; bh; bh = next) {
|
||||
next = bh->next;
|
||||
if (!bh->deleted && bh->scheduled) {
|
||||
bh->scheduled = 0;
|
||||
@ -72,11 +70,11 @@ int qemu_bh_poll(void)
|
||||
}
|
||||
}
|
||||
|
||||
nesting--;
|
||||
ctx->walking_bh--;
|
||||
|
||||
/* remove deleted bhs */
|
||||
if (!nesting) {
|
||||
bhp = &first_bh;
|
||||
if (!ctx->walking_bh) {
|
||||
bhp = &ctx->first_bh;
|
||||
while (*bhp) {
|
||||
bh = *bhp;
|
||||
if (bh->deleted) {
|
||||
@ -105,8 +103,7 @@ void qemu_bh_schedule(QEMUBH *bh)
|
||||
return;
|
||||
bh->scheduled = 1;
|
||||
bh->idle = 0;
|
||||
/* stop the currently executing CPU to execute the BH ASAP */
|
||||
qemu_notify_event();
|
||||
aio_notify(bh->ctx);
|
||||
}
|
||||
|
||||
void qemu_bh_cancel(QEMUBH *bh)
|
||||
@ -120,16 +117,20 @@ void qemu_bh_delete(QEMUBH *bh)
|
||||
bh->deleted = 1;
|
||||
}
|
||||
|
||||
void qemu_bh_update_timeout(uint32_t *timeout)
|
||||
static gboolean
|
||||
aio_ctx_prepare(GSource *source, gint *timeout)
|
||||
{
|
||||
AioContext *ctx = (AioContext *) source;
|
||||
QEMUBH *bh;
|
||||
bool scheduled = false;
|
||||
|
||||
for (bh = first_bh; bh; bh = bh->next) {
|
||||
for (bh = ctx->first_bh; bh; bh = bh->next) {
|
||||
if (!bh->deleted && bh->scheduled) {
|
||||
scheduled = true;
|
||||
if (bh->idle) {
|
||||
/* idle bottom halves will be polled at least
|
||||
* every 10ms */
|
||||
*timeout = MIN(10, *timeout);
|
||||
*timeout = 10;
|
||||
} else {
|
||||
/* non-idle bottom halves will be executed
|
||||
* immediately */
|
||||
@ -138,5 +139,86 @@ void qemu_bh_update_timeout(uint32_t *timeout)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return scheduled;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
aio_ctx_check(GSource *source)
|
||||
{
|
||||
AioContext *ctx = (AioContext *) source;
|
||||
QEMUBH *bh;
|
||||
|
||||
for (bh = ctx->first_bh; bh; bh = bh->next) {
|
||||
if (!bh->deleted && bh->scheduled) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return aio_pending(ctx);
|
||||
}
|
||||
|
||||
static gboolean
|
||||
aio_ctx_dispatch(GSource *source,
|
||||
GSourceFunc callback,
|
||||
gpointer user_data)
|
||||
{
|
||||
AioContext *ctx = (AioContext *) source;
|
||||
|
||||
assert(callback == NULL);
|
||||
aio_poll(ctx, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
aio_ctx_finalize(GSource *source)
|
||||
{
|
||||
AioContext *ctx = (AioContext *) source;
|
||||
|
||||
aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL);
|
||||
event_notifier_cleanup(&ctx->notifier);
|
||||
}
|
||||
|
||||
static GSourceFuncs aio_source_funcs = {
|
||||
aio_ctx_prepare,
|
||||
aio_ctx_check,
|
||||
aio_ctx_dispatch,
|
||||
aio_ctx_finalize
|
||||
};
|
||||
|
||||
GSource *aio_get_g_source(AioContext *ctx)
|
||||
{
|
||||
g_source_ref(&ctx->source);
|
||||
return &ctx->source;
|
||||
}
|
||||
|
||||
void aio_notify(AioContext *ctx)
|
||||
{
|
||||
event_notifier_set(&ctx->notifier);
|
||||
}
|
||||
|
||||
AioContext *aio_context_new(void)
|
||||
{
|
||||
AioContext *ctx;
|
||||
ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
|
||||
event_notifier_init(&ctx->notifier, false);
|
||||
aio_set_event_notifier(ctx, &ctx->notifier,
|
||||
(EventNotifierHandler *)
|
||||
event_notifier_test_and_clear, NULL);
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
void aio_context_ref(AioContext *ctx)
|
||||
{
|
||||
g_source_ref(&ctx->source);
|
||||
}
|
||||
|
||||
void aio_context_unref(AioContext *ctx)
|
||||
{
|
||||
g_source_unref(&ctx->source);
|
||||
}
|
||||
|
||||
void aio_flush(AioContext *ctx)
|
||||
{
|
||||
while (aio_poll(ctx, true));
|
||||
}
|
||||
|
@ -2,13 +2,18 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat
|
||||
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
|
||||
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
|
||||
block-obj-y += qed-check.o
|
||||
block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
|
||||
block-obj-$(CONFIG_WIN32) += raw-win32.o
|
||||
block-obj-y += parallels.o blkdebug.o blkverify.o
|
||||
block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
|
||||
block-obj-$(CONFIG_POSIX) += raw-posix.o
|
||||
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
|
||||
|
||||
ifeq ($(CONFIG_POSIX),y)
|
||||
block-obj-y += nbd.o sheepdog.o
|
||||
block-obj-$(CONFIG_LIBISCSI) += iscsi.o
|
||||
block-obj-$(CONFIG_CURL) += curl.o
|
||||
block-obj-$(CONFIG_RBD) += rbd.o
|
||||
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
|
||||
endif
|
||||
|
||||
common-obj-y += stream.o
|
||||
common-obj-y += commit.o
|
||||
|
@ -9,9 +9,10 @@
|
||||
*/
|
||||
#include "qemu-common.h"
|
||||
#include "qemu-aio.h"
|
||||
#include "block/raw-posix-aio.h"
|
||||
#include "qemu-queue.h"
|
||||
#include "block/raw-aio.h"
|
||||
#include "event_notifier.h"
|
||||
|
||||
#include <sys/eventfd.h>
|
||||
#include <libaio.h>
|
||||
|
||||
/*
|
||||
@ -37,7 +38,7 @@ struct qemu_laiocb {
|
||||
|
||||
struct qemu_laio_state {
|
||||
io_context_t ctx;
|
||||
int efd;
|
||||
EventNotifier e;
|
||||
int count;
|
||||
};
|
||||
|
||||
@ -76,29 +77,17 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
|
||||
qemu_aio_release(laiocb);
|
||||
}
|
||||
|
||||
static void qemu_laio_completion_cb(void *opaque)
|
||||
static void qemu_laio_completion_cb(EventNotifier *e)
|
||||
{
|
||||
struct qemu_laio_state *s = opaque;
|
||||
struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
|
||||
|
||||
while (1) {
|
||||
while (event_notifier_test_and_clear(&s->e)) {
|
||||
struct io_event events[MAX_EVENTS];
|
||||
uint64_t val;
|
||||
ssize_t ret;
|
||||
struct timespec ts = { 0 };
|
||||
int nevents, i;
|
||||
|
||||
do {
|
||||
ret = read(s->efd, &val, sizeof(val));
|
||||
} while (ret == -1 && errno == EINTR);
|
||||
|
||||
if (ret == -1 && errno == EAGAIN)
|
||||
break;
|
||||
|
||||
if (ret != 8)
|
||||
break;
|
||||
|
||||
do {
|
||||
nevents = io_getevents(s->ctx, val, MAX_EVENTS, events, &ts);
|
||||
nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts);
|
||||
} while (nevents == -EINTR);
|
||||
|
||||
for (i = 0; i < nevents; i++) {
|
||||
@ -112,9 +101,9 @@ static void qemu_laio_completion_cb(void *opaque)
|
||||
}
|
||||
}
|
||||
|
||||
static int qemu_laio_flush_cb(void *opaque)
|
||||
static int qemu_laio_flush_cb(EventNotifier *e)
|
||||
{
|
||||
struct qemu_laio_state *s = opaque;
|
||||
struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
|
||||
|
||||
return (s->count > 0) ? 1 : 0;
|
||||
}
|
||||
@ -146,8 +135,9 @@ static void laio_cancel(BlockDriverAIOCB *blockacb)
|
||||
* We might be able to do this slightly more optimal by removing the
|
||||
* O_NONBLOCK flag.
|
||||
*/
|
||||
while (laiocb->ret == -EINPROGRESS)
|
||||
qemu_laio_completion_cb(laiocb->ctx);
|
||||
while (laiocb->ret == -EINPROGRESS) {
|
||||
qemu_laio_completion_cb(&laiocb->ctx->e);
|
||||
}
|
||||
}
|
||||
|
||||
static AIOPool laio_pool = {
|
||||
@ -186,7 +176,7 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
|
||||
__func__, type);
|
||||
goto out_free_aiocb;
|
||||
}
|
||||
io_set_eventfd(&laiocb->iocb, s->efd);
|
||||
io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
|
||||
s->count++;
|
||||
|
||||
if (io_submit(s->ctx, 1, &iocbs) < 0)
|
||||
@ -205,21 +195,21 @@ void *laio_init(void)
|
||||
struct qemu_laio_state *s;
|
||||
|
||||
s = g_malloc0(sizeof(*s));
|
||||
s->efd = eventfd(0, 0);
|
||||
if (s->efd == -1)
|
||||
if (event_notifier_init(&s->e, false) < 0) {
|
||||
goto out_free_state;
|
||||
fcntl(s->efd, F_SETFL, O_NONBLOCK);
|
||||
}
|
||||
|
||||
if (io_setup(MAX_EVENTS, &s->ctx) != 0)
|
||||
if (io_setup(MAX_EVENTS, &s->ctx) != 0) {
|
||||
goto out_close_efd;
|
||||
}
|
||||
|
||||
qemu_aio_set_fd_handler(s->efd, qemu_laio_completion_cb, NULL,
|
||||
qemu_laio_flush_cb, s);
|
||||
qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb,
|
||||
qemu_laio_flush_cb);
|
||||
|
||||
return s;
|
||||
|
||||
out_close_efd:
|
||||
close(s->efd);
|
||||
event_notifier_cleanup(&s->e);
|
||||
out_free_state:
|
||||
g_free(s);
|
||||
return NULL;
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* QEMU Posix block I/O backend AIO support
|
||||
* Declarations for AIO in the raw protocol
|
||||
*
|
||||
* Copyright IBM, Corp. 2008
|
||||
*
|
||||
@ -12,8 +12,8 @@
|
||||
* Contributions after 2012-01-13 are licensed under the terms of the
|
||||
* GNU GPL, version 2 or (at your option) any later version.
|
||||
*/
|
||||
#ifndef QEMU_RAW_POSIX_AIO_H
|
||||
#define QEMU_RAW_POSIX_AIO_H
|
||||
#ifndef QEMU_RAW_AIO_H
|
||||
#define QEMU_RAW_AIO_H
|
||||
|
||||
/* AIO request types */
|
||||
#define QEMU_AIO_READ 0x0001
|
||||
@ -27,19 +27,22 @@
|
||||
#define QEMU_AIO_MISALIGNED 0x1000
|
||||
|
||||
|
||||
/* posix-aio-compat.c - thread pool based implementation */
|
||||
int paio_init(void);
|
||||
BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockDriverCompletionFunc *cb, void *opaque, int type);
|
||||
BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
|
||||
unsigned long int req, void *buf,
|
||||
BlockDriverCompletionFunc *cb, void *opaque);
|
||||
|
||||
/* linux-aio.c - Linux native implementation */
|
||||
#ifdef CONFIG_LINUX_AIO
|
||||
void *laio_init(void);
|
||||
BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockDriverCompletionFunc *cb, void *opaque, int type);
|
||||
#endif
|
||||
|
||||
#endif /* QEMU_RAW_POSIX_AIO_H */
|
||||
#ifdef _WIN32
|
||||
typedef struct QEMUWin32AIOState QEMUWin32AIOState;
|
||||
QEMUWin32AIOState *win32_aio_init(void);
|
||||
int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile);
|
||||
BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
|
||||
QEMUWin32AIOState *aio, HANDLE hfile,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockDriverCompletionFunc *cb, void *opaque, int type);
|
||||
#endif
|
||||
|
||||
#endif /* QEMU_RAW_AIO_H */
|
@ -27,7 +27,10 @@
|
||||
#include "qemu-log.h"
|
||||
#include "block_int.h"
|
||||
#include "module.h"
|
||||
#include "block/raw-posix-aio.h"
|
||||
#include "trace.h"
|
||||
#include "thread-pool.h"
|
||||
#include "iov.h"
|
||||
#include "raw-aio.h"
|
||||
|
||||
#if defined(__APPLE__) && (__MACH__)
|
||||
#include <paths.h>
|
||||
@ -149,6 +152,20 @@ typedef struct BDRVRawReopenState {
|
||||
static int fd_open(BlockDriverState *bs);
|
||||
static int64_t raw_getlength(BlockDriverState *bs);
|
||||
|
||||
typedef struct RawPosixAIOData {
|
||||
BlockDriverState *bs;
|
||||
int aio_fildes;
|
||||
union {
|
||||
struct iovec *aio_iov;
|
||||
void *aio_ioctl_buf;
|
||||
};
|
||||
int aio_niov;
|
||||
size_t aio_nbytes;
|
||||
#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
|
||||
off_t aio_offset;
|
||||
int aio_type;
|
||||
} RawPosixAIOData;
|
||||
|
||||
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
|
||||
static int cdrom_reopen(BlockDriverState *bs);
|
||||
#endif
|
||||
@ -266,14 +283,10 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
|
||||
}
|
||||
s->fd = fd;
|
||||
|
||||
/* We're falling back to POSIX AIO in some cases so init always */
|
||||
if (paio_init() < 0) {
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_LINUX_AIO
|
||||
if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
|
||||
goto out_close;
|
||||
qemu_close(fd);
|
||||
return -errno;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -284,10 +297,6 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
||||
out_close:
|
||||
qemu_close(fd);
|
||||
return -errno;
|
||||
}
|
||||
|
||||
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
|
||||
@ -434,6 +443,283 @@ static int qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
|
||||
if (ret == -1) {
|
||||
return -errno;
|
||||
}
|
||||
|
||||
/*
|
||||
* This looks weird, but the aio code only considers a request
|
||||
* successful if it has written the full number of bytes.
|
||||
*
|
||||
* Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command,
|
||||
* so in fact we return the ioctl command here to make posix_aio_read()
|
||||
* happy..
|
||||
*/
|
||||
return aiocb->aio_nbytes;
|
||||
}
|
||||
|
||||
static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = qemu_fdatasync(aiocb->aio_fildes);
|
||||
if (ret == -1) {
|
||||
return -errno;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PREADV
|
||||
|
||||
static bool preadv_present = true;
|
||||
|
||||
static ssize_t
|
||||
qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
|
||||
{
|
||||
return preadv(fd, iov, nr_iov, offset);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
|
||||
{
|
||||
return pwritev(fd, iov, nr_iov, offset);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static bool preadv_present = false;
|
||||
|
||||
static ssize_t
|
||||
qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
|
||||
{
|
||||
ssize_t len;
|
||||
|
||||
do {
|
||||
if (aiocb->aio_type & QEMU_AIO_WRITE)
|
||||
len = qemu_pwritev(aiocb->aio_fildes,
|
||||
aiocb->aio_iov,
|
||||
aiocb->aio_niov,
|
||||
aiocb->aio_offset);
|
||||
else
|
||||
len = qemu_preadv(aiocb->aio_fildes,
|
||||
aiocb->aio_iov,
|
||||
aiocb->aio_niov,
|
||||
aiocb->aio_offset);
|
||||
} while (len == -1 && errno == EINTR);
|
||||
|
||||
if (len == -1) {
|
||||
return -errno;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read/writes the data to/from a given linear buffer.
|
||||
*
|
||||
* Returns the number of bytes handles or -errno in case of an error. Short
|
||||
* reads are only returned if the end of the file is reached.
|
||||
*/
|
||||
static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
|
||||
{
|
||||
ssize_t offset = 0;
|
||||
ssize_t len;
|
||||
|
||||
while (offset < aiocb->aio_nbytes) {
|
||||
if (aiocb->aio_type & QEMU_AIO_WRITE) {
|
||||
len = pwrite(aiocb->aio_fildes,
|
||||
(const char *)buf + offset,
|
||||
aiocb->aio_nbytes - offset,
|
||||
aiocb->aio_offset + offset);
|
||||
} else {
|
||||
len = pread(aiocb->aio_fildes,
|
||||
buf + offset,
|
||||
aiocb->aio_nbytes - offset,
|
||||
aiocb->aio_offset + offset);
|
||||
}
|
||||
if (len == -1 && errno == EINTR) {
|
||||
continue;
|
||||
} else if (len == -1) {
|
||||
offset = -errno;
|
||||
break;
|
||||
} else if (len == 0) {
|
||||
break;
|
||||
}
|
||||
offset += len;
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
|
||||
{
|
||||
ssize_t nbytes;
|
||||
char *buf;
|
||||
|
||||
if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
|
||||
/*
|
||||
* If there is just a single buffer, and it is properly aligned
|
||||
* we can just use plain pread/pwrite without any problems.
|
||||
*/
|
||||
if (aiocb->aio_niov == 1) {
|
||||
return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
|
||||
}
|
||||
/*
|
||||
* We have more than one iovec, and all are properly aligned.
|
||||
*
|
||||
* Try preadv/pwritev first and fall back to linearizing the
|
||||
* buffer if it's not supported.
|
||||
*/
|
||||
if (preadv_present) {
|
||||
nbytes = handle_aiocb_rw_vector(aiocb);
|
||||
if (nbytes == aiocb->aio_nbytes ||
|
||||
(nbytes < 0 && nbytes != -ENOSYS)) {
|
||||
return nbytes;
|
||||
}
|
||||
preadv_present = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX(hch): short read/write. no easy way to handle the reminder
|
||||
* using these interfaces. For now retry using plain
|
||||
* pread/pwrite?
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Ok, we have to do it the hard way, copy all segments into
|
||||
* a single aligned buffer.
|
||||
*/
|
||||
buf = qemu_blockalign(aiocb->bs, aiocb->aio_nbytes);
|
||||
if (aiocb->aio_type & QEMU_AIO_WRITE) {
|
||||
char *p = buf;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < aiocb->aio_niov; ++i) {
|
||||
memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
|
||||
p += aiocb->aio_iov[i].iov_len;
|
||||
}
|
||||
}
|
||||
|
||||
nbytes = handle_aiocb_rw_linear(aiocb, buf);
|
||||
if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
|
||||
char *p = buf;
|
||||
size_t count = aiocb->aio_nbytes, copy;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < aiocb->aio_niov && count; ++i) {
|
||||
copy = count;
|
||||
if (copy > aiocb->aio_iov[i].iov_len) {
|
||||
copy = aiocb->aio_iov[i].iov_len;
|
||||
}
|
||||
memcpy(aiocb->aio_iov[i].iov_base, p, copy);
|
||||
p += copy;
|
||||
count -= copy;
|
||||
}
|
||||
}
|
||||
qemu_vfree(buf);
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int aio_worker(void *arg)
|
||||
{
|
||||
RawPosixAIOData *aiocb = arg;
|
||||
ssize_t ret = 0;
|
||||
|
||||
switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
|
||||
case QEMU_AIO_READ:
|
||||
ret = handle_aiocb_rw(aiocb);
|
||||
if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) {
|
||||
iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
|
||||
0, aiocb->aio_nbytes - ret);
|
||||
|
||||
ret = aiocb->aio_nbytes;
|
||||
}
|
||||
if (ret == aiocb->aio_nbytes) {
|
||||
ret = 0;
|
||||
} else if (ret >= 0 && ret < aiocb->aio_nbytes) {
|
||||
ret = -EINVAL;
|
||||
}
|
||||
break;
|
||||
case QEMU_AIO_WRITE:
|
||||
ret = handle_aiocb_rw(aiocb);
|
||||
if (ret == aiocb->aio_nbytes) {
|
||||
ret = 0;
|
||||
} else if (ret >= 0 && ret < aiocb->aio_nbytes) {
|
||||
ret = -EINVAL;
|
||||
}
|
||||
break;
|
||||
case QEMU_AIO_FLUSH:
|
||||
ret = handle_aiocb_flush(aiocb);
|
||||
break;
|
||||
case QEMU_AIO_IOCTL:
|
||||
ret = handle_aiocb_ioctl(aiocb);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
g_slice_free(RawPosixAIOData, aiocb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockDriverCompletionFunc *cb, void *opaque, int type)
|
||||
{
|
||||
RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
|
||||
|
||||
acb->bs = bs;
|
||||
acb->aio_type = type;
|
||||
acb->aio_fildes = fd;
|
||||
|
||||
if (qiov) {
|
||||
acb->aio_iov = qiov->iov;
|
||||
acb->aio_niov = qiov->niov;
|
||||
}
|
||||
acb->aio_nbytes = nb_sectors * 512;
|
||||
acb->aio_offset = sector_num * 512;
|
||||
|
||||
trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
|
||||
return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
|
||||
}
|
||||
|
||||
static BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
|
||||
unsigned long int req, void *buf,
|
||||
BlockDriverCompletionFunc *cb, void *opaque)
|
||||
{
|
||||
RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
|
||||
|
||||
acb->bs = bs;
|
||||
acb->aio_type = QEMU_AIO_IOCTL;
|
||||
acb->aio_fildes = fd;
|
||||
acb->aio_offset = 0;
|
||||
acb->aio_ioctl_buf = buf;
|
||||
acb->aio_ioctl_cmd = req;
|
||||
|
||||
return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
|
||||
}
|
||||
|
||||
static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockDriverCompletionFunc *cb, void *opaque, int type)
|
||||
|
@ -25,6 +25,10 @@
|
||||
#include "qemu-timer.h"
|
||||
#include "block_int.h"
|
||||
#include "module.h"
|
||||
#include "raw-aio.h"
|
||||
#include "trace.h"
|
||||
#include "thread-pool.h"
|
||||
#include "iov.h"
|
||||
#include <windows.h>
|
||||
#include <winioctl.h>
|
||||
|
||||
@ -32,12 +36,130 @@
|
||||
#define FTYPE_CD 1
|
||||
#define FTYPE_HARDDISK 2
|
||||
|
||||
static QEMUWin32AIOState *aio;
|
||||
|
||||
typedef struct RawWin32AIOData {
|
||||
BlockDriverState *bs;
|
||||
HANDLE hfile;
|
||||
struct iovec *aio_iov;
|
||||
int aio_niov;
|
||||
size_t aio_nbytes;
|
||||
off64_t aio_offset;
|
||||
int aio_type;
|
||||
} RawWin32AIOData;
|
||||
|
||||
typedef struct BDRVRawState {
|
||||
HANDLE hfile;
|
||||
int type;
|
||||
char drive_path[16]; /* format: "d:\" */
|
||||
QEMUWin32AIOState *aio;
|
||||
} BDRVRawState;
|
||||
|
||||
/*
|
||||
* Read/writes the data to/from a given linear buffer.
|
||||
*
|
||||
* Returns the number of bytes handles or -errno in case of an error. Short
|
||||
* reads are only returned if the end of the file is reached.
|
||||
*/
|
||||
static size_t handle_aiocb_rw(RawWin32AIOData *aiocb)
|
||||
{
|
||||
size_t offset = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < aiocb->aio_niov; i++) {
|
||||
OVERLAPPED ov;
|
||||
DWORD ret, ret_count, len;
|
||||
|
||||
memset(&ov, 0, sizeof(ov));
|
||||
ov.Offset = (aiocb->aio_offset + offset);
|
||||
ov.OffsetHigh = (aiocb->aio_offset + offset) >> 32;
|
||||
len = aiocb->aio_iov[i].iov_len;
|
||||
if (aiocb->aio_type & QEMU_AIO_WRITE) {
|
||||
ret = WriteFile(aiocb->hfile, aiocb->aio_iov[i].iov_base,
|
||||
len, &ret_count, &ov);
|
||||
} else {
|
||||
ret = ReadFile(aiocb->hfile, aiocb->aio_iov[i].iov_base,
|
||||
len, &ret_count, &ov);
|
||||
}
|
||||
if (!ret) {
|
||||
ret_count = 0;
|
||||
}
|
||||
if (ret_count != len) {
|
||||
break;
|
||||
}
|
||||
offset += len;
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
static int aio_worker(void *arg)
|
||||
{
|
||||
RawWin32AIOData *aiocb = arg;
|
||||
ssize_t ret = 0;
|
||||
size_t count;
|
||||
|
||||
switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
|
||||
case QEMU_AIO_READ:
|
||||
count = handle_aiocb_rw(aiocb);
|
||||
if (count < aiocb->aio_nbytes && aiocb->bs->growable) {
|
||||
/* A short read means that we have reached EOF. Pad the buffer
|
||||
* with zeros for bytes after EOF. */
|
||||
iov_memset(aiocb->aio_iov, aiocb->aio_niov, count,
|
||||
0, aiocb->aio_nbytes - count);
|
||||
|
||||
count = aiocb->aio_nbytes;
|
||||
}
|
||||
if (count == aiocb->aio_nbytes) {
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
}
|
||||
break;
|
||||
case QEMU_AIO_WRITE:
|
||||
count = handle_aiocb_rw(aiocb);
|
||||
if (count == aiocb->aio_nbytes) {
|
||||
count = 0;
|
||||
} else {
|
||||
count = -EINVAL;
|
||||
}
|
||||
break;
|
||||
case QEMU_AIO_FLUSH:
|
||||
if (!FlushFileBuffers(aiocb->hfile)) {
|
||||
return -EIO;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
g_slice_free(RawWin32AIOData, aiocb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockDriverCompletionFunc *cb, void *opaque, int type)
|
||||
{
|
||||
RawWin32AIOData *acb = g_slice_new(RawWin32AIOData);
|
||||
|
||||
acb->bs = bs;
|
||||
acb->hfile = hfile;
|
||||
acb->aio_type = type;
|
||||
|
||||
if (qiov) {
|
||||
acb->aio_iov = qiov->iov;
|
||||
acb->aio_niov = qiov->niov;
|
||||
}
|
||||
acb->aio_nbytes = nb_sectors * 512;
|
||||
acb->aio_offset = sector_num * 512;
|
||||
|
||||
trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
|
||||
return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
|
||||
}
|
||||
|
||||
int qemu_ftruncate64(int fd, int64_t length)
|
||||
{
|
||||
LARGE_INTEGER li;
|
||||
@ -89,6 +211,9 @@ static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
|
||||
}
|
||||
|
||||
*overlapped = FILE_ATTRIBUTE_NORMAL;
|
||||
if (flags & BDRV_O_NATIVE_AIO) {
|
||||
*overlapped |= FILE_FLAG_OVERLAPPED;
|
||||
}
|
||||
if (flags & BDRV_O_NOCACHE) {
|
||||
*overlapped |= FILE_FLAG_NO_BUFFERING;
|
||||
}
|
||||
@ -103,6 +228,13 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
|
||||
s->type = FTYPE_FILE;
|
||||
|
||||
raw_parse_flags(flags, &access_flags, &overlapped);
|
||||
|
||||
if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) {
|
||||
aio = win32_aio_init();
|
||||
if (aio == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
s->hfile = CreateFile(filename, access_flags,
|
||||
FILE_SHARE_READ, NULL,
|
||||
@ -112,64 +244,53 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
|
||||
|
||||
if (err == ERROR_ACCESS_DENIED)
|
||||
return -EACCES;
|
||||
return -1;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (flags & BDRV_O_NATIVE_AIO) {
|
||||
int ret = win32_aio_attach(aio, s->hfile);
|
||||
if (ret < 0) {
|
||||
CloseHandle(s->hfile);
|
||||
return ret;
|
||||
}
|
||||
s->aio = aio;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int raw_read(BlockDriverState *bs, int64_t sector_num,
|
||||
uint8_t *buf, int nb_sectors)
|
||||
static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockDriverCompletionFunc *cb, void *opaque)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
OVERLAPPED ov;
|
||||
DWORD ret_count;
|
||||
int ret;
|
||||
int64_t offset = sector_num * 512;
|
||||
int count = nb_sectors * 512;
|
||||
|
||||
memset(&ov, 0, sizeof(ov));
|
||||
ov.Offset = offset;
|
||||
ov.OffsetHigh = offset >> 32;
|
||||
ret = ReadFile(s->hfile, buf, count, &ret_count, &ov);
|
||||
if (!ret)
|
||||
return ret_count;
|
||||
if (ret_count == count)
|
||||
ret_count = 0;
|
||||
return ret_count;
|
||||
}
|
||||
|
||||
static int raw_write(BlockDriverState *bs, int64_t sector_num,
|
||||
const uint8_t *buf, int nb_sectors)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
OVERLAPPED ov;
|
||||
DWORD ret_count;
|
||||
int ret;
|
||||
int64_t offset = sector_num * 512;
|
||||
int count = nb_sectors * 512;
|
||||
|
||||
memset(&ov, 0, sizeof(ov));
|
||||
ov.Offset = offset;
|
||||
ov.OffsetHigh = offset >> 32;
|
||||
ret = WriteFile(s->hfile, buf, count, &ret_count, &ov);
|
||||
if (!ret)
|
||||
return ret_count;
|
||||
if (ret_count == count)
|
||||
ret_count = 0;
|
||||
return ret_count;
|
||||
}
|
||||
|
||||
static int raw_flush(BlockDriverState *bs)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
int ret;
|
||||
|
||||
ret = FlushFileBuffers(s->hfile);
|
||||
if (ret == 0) {
|
||||
return -EIO;
|
||||
if (s->aio) {
|
||||
return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
|
||||
nb_sectors, cb, opaque, QEMU_AIO_READ);
|
||||
} else {
|
||||
return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
|
||||
cb, opaque, QEMU_AIO_READ);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockDriverCompletionFunc *cb, void *opaque)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
if (s->aio) {
|
||||
return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
|
||||
nb_sectors, cb, opaque, QEMU_AIO_WRITE);
|
||||
} else {
|
||||
return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
|
||||
cb, opaque, QEMU_AIO_WRITE);
|
||||
}
|
||||
}
|
||||
|
||||
static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
|
||||
BlockDriverCompletionFunc *cb, void *opaque)
|
||||
{
|
||||
BDRVRawState *s = bs->opaque;
|
||||
return paio_submit(bs, s->hfile, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
|
||||
}
|
||||
|
||||
static void raw_close(BlockDriverState *bs)
|
||||
@ -290,9 +411,9 @@ static BlockDriver bdrv_file = {
|
||||
.bdrv_close = raw_close,
|
||||
.bdrv_create = raw_create,
|
||||
|
||||
.bdrv_read = raw_read,
|
||||
.bdrv_write = raw_write,
|
||||
.bdrv_co_flush_to_disk = raw_flush,
|
||||
.bdrv_aio_readv = raw_aio_readv,
|
||||
.bdrv_aio_writev = raw_aio_writev,
|
||||
.bdrv_aio_flush = raw_aio_flush,
|
||||
|
||||
.bdrv_truncate = raw_truncate,
|
||||
.bdrv_getlength = raw_getlength,
|
||||
@ -413,9 +534,9 @@ static BlockDriver bdrv_host_device = {
|
||||
.bdrv_close = raw_close,
|
||||
.bdrv_has_zero_init = hdev_has_zero_init,
|
||||
|
||||
.bdrv_read = raw_read,
|
||||
.bdrv_write = raw_write,
|
||||
.bdrv_co_flush_to_disk = raw_flush,
|
||||
.bdrv_aio_readv = raw_aio_readv,
|
||||
.bdrv_aio_writev = raw_aio_writev,
|
||||
.bdrv_aio_flush = raw_aio_flush,
|
||||
|
||||
.bdrv_getlength = raw_getlength,
|
||||
.bdrv_get_allocated_file_size
|
||||
|
226
block/win32-aio.c
Normal file
226
block/win32-aio.c
Normal file
@ -0,0 +1,226 @@
|
||||
/*
|
||||
* Block driver for RAW files (win32)
|
||||
*
|
||||
* Copyright (c) 2006 Fabrice Bellard
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
#include "qemu-common.h"
|
||||
#include "qemu-timer.h"
|
||||
#include "block_int.h"
|
||||
#include "module.h"
|
||||
#include "qemu-common.h"
|
||||
#include "qemu-aio.h"
|
||||
#include "raw-aio.h"
|
||||
#include "event_notifier.h"
|
||||
#include <windows.h>
|
||||
#include <winioctl.h>
|
||||
|
||||
#define FTYPE_FILE 0
|
||||
#define FTYPE_CD 1
|
||||
#define FTYPE_HARDDISK 2
|
||||
|
||||
struct QEMUWin32AIOState {
|
||||
HANDLE hIOCP;
|
||||
EventNotifier e;
|
||||
int count;
|
||||
};
|
||||
|
||||
typedef struct QEMUWin32AIOCB {
|
||||
BlockDriverAIOCB common;
|
||||
struct QEMUWin32AIOState *ctx;
|
||||
int nbytes;
|
||||
OVERLAPPED ov;
|
||||
QEMUIOVector *qiov;
|
||||
void *buf;
|
||||
bool is_read;
|
||||
bool is_linear;
|
||||
} QEMUWin32AIOCB;
|
||||
|
||||
/*
|
||||
* Completes an AIO request (calls the callback and frees the ACB).
|
||||
*/
|
||||
static void win32_aio_process_completion(QEMUWin32AIOState *s,
|
||||
QEMUWin32AIOCB *waiocb, DWORD count)
|
||||
{
|
||||
int ret;
|
||||
s->count--;
|
||||
|
||||
if (waiocb->ov.Internal != 0) {
|
||||
ret = -EIO;
|
||||
} else {
|
||||
ret = 0;
|
||||
if (count < waiocb->nbytes) {
|
||||
/* Short reads mean EOF, pad with zeros. */
|
||||
if (waiocb->is_read) {
|
||||
qemu_iovec_memset(waiocb->qiov, count, 0,
|
||||
waiocb->qiov->size - count);
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!waiocb->is_linear) {
|
||||
if (ret == 0 && waiocb->is_read) {
|
||||
QEMUIOVector *qiov = waiocb->qiov;
|
||||
char *p = waiocb->buf;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < qiov->niov; ++i) {
|
||||
memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len);
|
||||
p += qiov->iov[i].iov_len;
|
||||
}
|
||||
g_free(waiocb->buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
waiocb->common.cb(waiocb->common.opaque, ret);
|
||||
qemu_aio_release(waiocb);
|
||||
}
|
||||
|
||||
static void win32_aio_completion_cb(EventNotifier *e)
|
||||
{
|
||||
QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e);
|
||||
DWORD count;
|
||||
ULONG_PTR key;
|
||||
OVERLAPPED *ov;
|
||||
|
||||
event_notifier_test_and_clear(&s->e);
|
||||
while (GetQueuedCompletionStatus(s->hIOCP, &count, &key, &ov, 0)) {
|
||||
QEMUWin32AIOCB *waiocb = container_of(ov, QEMUWin32AIOCB, ov);
|
||||
|
||||
win32_aio_process_completion(s, waiocb, count);
|
||||
}
|
||||
}
|
||||
|
||||
static int win32_aio_flush_cb(EventNotifier *e)
|
||||
{
|
||||
QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e);
|
||||
|
||||
return (s->count > 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
static void win32_aio_cancel(BlockDriverAIOCB *blockacb)
|
||||
{
|
||||
QEMUWin32AIOCB *waiocb = (QEMUWin32AIOCB *)blockacb;
|
||||
|
||||
/*
|
||||
* CancelIoEx is only supported in Vista and newer. For now, just
|
||||
* wait for completion.
|
||||
*/
|
||||
while (!HasOverlappedIoCompleted(&waiocb->ov)) {
|
||||
qemu_aio_wait();
|
||||
}
|
||||
}
|
||||
|
||||
static AIOPool win32_aio_pool = {
|
||||
.aiocb_size = sizeof(QEMUWin32AIOCB),
|
||||
.cancel = win32_aio_cancel,
|
||||
};
|
||||
|
||||
BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
|
||||
QEMUWin32AIOState *aio, HANDLE hfile,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockDriverCompletionFunc *cb, void *opaque, int type)
|
||||
{
|
||||
struct QEMUWin32AIOCB *waiocb;
|
||||
uint64_t offset = sector_num * 512;
|
||||
DWORD rc;
|
||||
|
||||
waiocb = qemu_aio_get(&win32_aio_pool, bs, cb, opaque);
|
||||
waiocb->nbytes = nb_sectors * 512;
|
||||
waiocb->qiov = qiov;
|
||||
waiocb->is_read = (type == QEMU_AIO_READ);
|
||||
|
||||
if (qiov->niov > 1) {
|
||||
waiocb->buf = qemu_blockalign(bs, qiov->size);
|
||||
if (type & QEMU_AIO_WRITE) {
|
||||
char *p = waiocb->buf;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < qiov->niov; ++i) {
|
||||
memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len);
|
||||
p += qiov->iov[i].iov_len;
|
||||
}
|
||||
}
|
||||
waiocb->is_linear = false;
|
||||
} else {
|
||||
waiocb->buf = qiov->iov[0].iov_base;
|
||||
waiocb->is_linear = true;
|
||||
}
|
||||
|
||||
waiocb->ov = (OVERLAPPED) {
|
||||
.Offset = (DWORD) offset,
|
||||
.OffsetHigh = (DWORD) (offset >> 32),
|
||||
.hEvent = event_notifier_get_handle(&aio->e)
|
||||
};
|
||||
aio->count++;
|
||||
|
||||
if (type & QEMU_AIO_READ) {
|
||||
rc = ReadFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov);
|
||||
} else {
|
||||
rc = WriteFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov);
|
||||
}
|
||||
if(rc == 0 && GetLastError() != ERROR_IO_PENDING) {
|
||||
goto out_dec_count;
|
||||
}
|
||||
return &waiocb->common;
|
||||
|
||||
out_dec_count:
|
||||
aio->count--;
|
||||
qemu_aio_release(waiocb);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
|
||||
{
|
||||
if (CreateIoCompletionPort(hfile, aio->hIOCP, (ULONG_PTR) 0, 0) == NULL) {
|
||||
return -EINVAL;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
QEMUWin32AIOState *win32_aio_init(void)
|
||||
{
|
||||
QEMUWin32AIOState *s;
|
||||
|
||||
s = g_malloc0(sizeof(*s));
|
||||
if (event_notifier_init(&s->e, false) < 0) {
|
||||
goto out_free_state;
|
||||
}
|
||||
|
||||
s->hIOCP = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
|
||||
if (s->hIOCP == NULL) {
|
||||
goto out_close_efd;
|
||||
}
|
||||
|
||||
qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb,
|
||||
win32_aio_flush_cb);
|
||||
|
||||
return s;
|
||||
|
||||
out_close_efd:
|
||||
event_notifier_cleanup(&s->e);
|
||||
out_free_state:
|
||||
g_free(s);
|
||||
return NULL;
|
||||
}
|
11
compiler.h
11
compiler.h
@ -50,16 +50,13 @@
|
||||
# define __printf__ __gnu_printf__
|
||||
# endif
|
||||
# endif
|
||||
#if defined(_WIN32)
|
||||
#define GCC_WEAK __attribute__((weak))
|
||||
#define GCC_WEAK_DECL GCC_WEAK
|
||||
#else
|
||||
#define GCC_WEAK __attribute__((weak))
|
||||
#define GCC_WEAK_DECL
|
||||
#endif
|
||||
# define QEMU_WEAK_ALIAS(newname, oldname) \
|
||||
typeof(oldname) newname __attribute__((weak, alias (#oldname)))
|
||||
#else
|
||||
#define GCC_ATTR /**/
|
||||
#define GCC_FMT_ATTR(n, m)
|
||||
#define QEMU_WEAK_ALIAS(newname, oldname) \
|
||||
_Pragma("weak " #newname "=" #oldname)
|
||||
#endif
|
||||
|
||||
#endif /* COMPILER_H */
|
||||
|
108
cutils.c
108
cutils.c
@ -142,109 +142,6 @@ int qemu_fdatasync(int fd)
|
||||
#endif
|
||||
}
|
||||
|
||||
/* io vectors */
|
||||
|
||||
void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
|
||||
{
|
||||
qiov->iov = g_malloc(alloc_hint * sizeof(struct iovec));
|
||||
qiov->niov = 0;
|
||||
qiov->nalloc = alloc_hint;
|
||||
qiov->size = 0;
|
||||
}
|
||||
|
||||
void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
|
||||
{
|
||||
int i;
|
||||
|
||||
qiov->iov = iov;
|
||||
qiov->niov = niov;
|
||||
qiov->nalloc = -1;
|
||||
qiov->size = 0;
|
||||
for (i = 0; i < niov; i++)
|
||||
qiov->size += iov[i].iov_len;
|
||||
}
|
||||
|
||||
void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
|
||||
{
|
||||
assert(qiov->nalloc != -1);
|
||||
|
||||
if (qiov->niov == qiov->nalloc) {
|
||||
qiov->nalloc = 2 * qiov->nalloc + 1;
|
||||
qiov->iov = g_realloc(qiov->iov, qiov->nalloc * sizeof(struct iovec));
|
||||
}
|
||||
qiov->iov[qiov->niov].iov_base = base;
|
||||
qiov->iov[qiov->niov].iov_len = len;
|
||||
qiov->size += len;
|
||||
++qiov->niov;
|
||||
}
|
||||
|
||||
/*
|
||||
* Concatenates (partial) iovecs from src to the end of dst.
|
||||
* It starts copying after skipping `soffset' bytes at the
|
||||
* beginning of src and adds individual vectors from src to
|
||||
* dst copies up to `sbytes' bytes total, or up to the end
|
||||
* of src if it comes first. This way, it is okay to specify
|
||||
* very large value for `sbytes' to indicate "up to the end
|
||||
* of src".
|
||||
* Only vector pointers are processed, not the actual data buffers.
|
||||
*/
|
||||
void qemu_iovec_concat(QEMUIOVector *dst,
|
||||
QEMUIOVector *src, size_t soffset, size_t sbytes)
|
||||
{
|
||||
int i;
|
||||
size_t done;
|
||||
struct iovec *siov = src->iov;
|
||||
assert(dst->nalloc != -1);
|
||||
assert(src->size >= soffset);
|
||||
for (i = 0, done = 0; done < sbytes && i < src->niov; i++) {
|
||||
if (soffset < siov[i].iov_len) {
|
||||
size_t len = MIN(siov[i].iov_len - soffset, sbytes - done);
|
||||
qemu_iovec_add(dst, siov[i].iov_base + soffset, len);
|
||||
done += len;
|
||||
soffset = 0;
|
||||
} else {
|
||||
soffset -= siov[i].iov_len;
|
||||
}
|
||||
}
|
||||
/* return done; */
|
||||
}
|
||||
|
||||
void qemu_iovec_destroy(QEMUIOVector *qiov)
|
||||
{
|
||||
assert(qiov->nalloc != -1);
|
||||
|
||||
qemu_iovec_reset(qiov);
|
||||
g_free(qiov->iov);
|
||||
qiov->nalloc = 0;
|
||||
qiov->iov = NULL;
|
||||
}
|
||||
|
||||
void qemu_iovec_reset(QEMUIOVector *qiov)
|
||||
{
|
||||
assert(qiov->nalloc != -1);
|
||||
|
||||
qiov->niov = 0;
|
||||
qiov->size = 0;
|
||||
}
|
||||
|
||||
size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
|
||||
void *buf, size_t bytes)
|
||||
{
|
||||
return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes);
|
||||
}
|
||||
|
||||
size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
|
||||
const void *buf, size_t bytes)
|
||||
{
|
||||
return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes);
|
||||
}
|
||||
|
||||
size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
|
||||
int fillc, size_t bytes)
|
||||
{
|
||||
return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks if a buffer is all zeroes
|
||||
*
|
||||
@ -383,11 +280,6 @@ int qemu_parse_fd(const char *param)
|
||||
return fd;
|
||||
}
|
||||
|
||||
int qemu_parse_fdset(const char *param)
|
||||
{
|
||||
return qemu_parse_fd(param);
|
||||
}
|
||||
|
||||
/* round down to the nearest power of 2*/
|
||||
int64_t pow2floor(int64_t value)
|
||||
{
|
||||
|
120
event_notifier-posix.c
Normal file
120
event_notifier-posix.c
Normal file
@ -0,0 +1,120 @@
|
||||
/*
|
||||
* event notifier support
|
||||
*
|
||||
* Copyright Red Hat, Inc. 2010
|
||||
*
|
||||
* Authors:
|
||||
* Michael S. Tsirkin <mst@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "qemu-common.h"
|
||||
#include "event_notifier.h"
|
||||
#include "qemu-char.h"
|
||||
|
||||
#ifdef CONFIG_EVENTFD
|
||||
#include <sys/eventfd.h>
|
||||
#endif
|
||||
|
||||
void event_notifier_init_fd(EventNotifier *e, int fd)
|
||||
{
|
||||
e->rfd = fd;
|
||||
e->wfd = fd;
|
||||
}
|
||||
|
||||
int event_notifier_init(EventNotifier *e, int active)
|
||||
{
|
||||
int fds[2];
|
||||
int ret;
|
||||
|
||||
#ifdef CONFIG_EVENTFD
|
||||
ret = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
|
||||
#else
|
||||
ret = -1;
|
||||
errno = ENOSYS;
|
||||
#endif
|
||||
if (ret >= 0) {
|
||||
e->rfd = e->wfd = ret;
|
||||
} else {
|
||||
if (errno != ENOSYS) {
|
||||
return -errno;
|
||||
}
|
||||
if (qemu_pipe(fds) < 0) {
|
||||
return -errno;
|
||||
}
|
||||
ret = fcntl_setfl(fds[0], O_NONBLOCK);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
goto fail;
|
||||
}
|
||||
ret = fcntl_setfl(fds[1], O_NONBLOCK);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
goto fail;
|
||||
}
|
||||
e->rfd = fds[0];
|
||||
e->wfd = fds[1];
|
||||
}
|
||||
if (active) {
|
||||
event_notifier_set(e);
|
||||
}
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
close(fds[0]);
|
||||
close(fds[1]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void event_notifier_cleanup(EventNotifier *e)
|
||||
{
|
||||
if (e->rfd != e->wfd) {
|
||||
close(e->rfd);
|
||||
}
|
||||
close(e->wfd);
|
||||
}
|
||||
|
||||
int event_notifier_get_fd(EventNotifier *e)
|
||||
{
|
||||
return e->rfd;
|
||||
}
|
||||
|
||||
int event_notifier_set_handler(EventNotifier *e,
|
||||
EventNotifierHandler *handler)
|
||||
{
|
||||
return qemu_set_fd_handler(e->rfd, (IOHandler *)handler, NULL, e);
|
||||
}
|
||||
|
||||
int event_notifier_set(EventNotifier *e)
|
||||
{
|
||||
static const uint64_t value = 1;
|
||||
ssize_t ret;
|
||||
|
||||
do {
|
||||
ret = write(e->wfd, &value, sizeof(value));
|
||||
} while (ret < 0 && errno == EINTR);
|
||||
|
||||
/* EAGAIN is fine, a read must be pending. */
|
||||
if (ret < 0 && errno != EAGAIN) {
|
||||
return -errno;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int event_notifier_test_and_clear(EventNotifier *e)
|
||||
{
|
||||
int value;
|
||||
ssize_t len;
|
||||
char buffer[512];
|
||||
|
||||
/* Drain the notify pipe. For eventfd, only 8 bytes will be read. */
|
||||
value = 0;
|
||||
do {
|
||||
len = read(e->rfd, buffer, sizeof(buffer));
|
||||
value |= (len > 0);
|
||||
} while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
|
||||
|
||||
return value;
|
||||
}
|
59
event_notifier-win32.c
Normal file
59
event_notifier-win32.c
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* event notifier support
|
||||
*
|
||||
* Copyright Red Hat, Inc. 2010
|
||||
*
|
||||
* Authors:
|
||||
* Michael S. Tsirkin <mst@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "qemu-common.h"
|
||||
#include "event_notifier.h"
|
||||
#include "main-loop.h"
|
||||
|
||||
int event_notifier_init(EventNotifier *e, int active)
|
||||
{
|
||||
e->event = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
assert(e->event);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void event_notifier_cleanup(EventNotifier *e)
|
||||
{
|
||||
CloseHandle(e->event);
|
||||
}
|
||||
|
||||
HANDLE event_notifier_get_handle(EventNotifier *e)
|
||||
{
|
||||
return e->event;
|
||||
}
|
||||
|
||||
int event_notifier_set_handler(EventNotifier *e,
|
||||
EventNotifierHandler *handler)
|
||||
{
|
||||
if (handler) {
|
||||
return qemu_add_wait_object(e->event, (IOHandler *)handler, e);
|
||||
} else {
|
||||
qemu_del_wait_object(e->event, (IOHandler *)handler, e);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int event_notifier_set(EventNotifier *e)
|
||||
{
|
||||
SetEvent(e->event);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int event_notifier_test_and_clear(EventNotifier *e)
|
||||
{
|
||||
int ret = WaitForSingleObject(e->event, 0);
|
||||
if (ret == WAIT_OBJECT_0) {
|
||||
ResetEvent(e->event);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
@ -1,67 +0,0 @@
|
||||
/*
|
||||
* event notifier support
|
||||
*
|
||||
* Copyright Red Hat, Inc. 2010
|
||||
*
|
||||
* Authors:
|
||||
* Michael S. Tsirkin <mst@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "qemu-common.h"
|
||||
#include "event_notifier.h"
|
||||
#include "qemu-char.h"
|
||||
|
||||
#ifdef CONFIG_EVENTFD
|
||||
#include <sys/eventfd.h>
|
||||
#endif
|
||||
|
||||
void event_notifier_init_fd(EventNotifier *e, int fd)
|
||||
{
|
||||
e->fd = fd;
|
||||
}
|
||||
|
||||
int event_notifier_init(EventNotifier *e, int active)
|
||||
{
|
||||
#ifdef CONFIG_EVENTFD
|
||||
int fd = eventfd(!!active, EFD_NONBLOCK | EFD_CLOEXEC);
|
||||
if (fd < 0)
|
||||
return -errno;
|
||||
e->fd = fd;
|
||||
return 0;
|
||||
#else
|
||||
return -ENOSYS;
|
||||
#endif
|
||||
}
|
||||
|
||||
void event_notifier_cleanup(EventNotifier *e)
|
||||
{
|
||||
close(e->fd);
|
||||
}
|
||||
|
||||
int event_notifier_get_fd(EventNotifier *e)
|
||||
{
|
||||
return e->fd;
|
||||
}
|
||||
|
||||
int event_notifier_set_handler(EventNotifier *e,
|
||||
EventNotifierHandler *handler)
|
||||
{
|
||||
return qemu_set_fd_handler(e->fd, (IOHandler *)handler, NULL, e);
|
||||
}
|
||||
|
||||
int event_notifier_set(EventNotifier *e)
|
||||
{
|
||||
uint64_t value = 1;
|
||||
int r = write(e->fd, &value, sizeof(value));
|
||||
return r == sizeof(value);
|
||||
}
|
||||
|
||||
int event_notifier_test_and_clear(EventNotifier *e)
|
||||
{
|
||||
uint64_t value;
|
||||
int r = read(e->fd, &value, sizeof(value));
|
||||
return r == sizeof(value);
|
||||
}
|
@ -15,18 +15,32 @@
|
||||
|
||||
#include "qemu-common.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
struct EventNotifier {
|
||||
int fd;
|
||||
#ifdef _WIN32
|
||||
HANDLE event;
|
||||
#else
|
||||
int rfd;
|
||||
int wfd;
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef void EventNotifierHandler(EventNotifier *);
|
||||
|
||||
void event_notifier_init_fd(EventNotifier *, int fd);
|
||||
int event_notifier_init(EventNotifier *, int active);
|
||||
void event_notifier_cleanup(EventNotifier *);
|
||||
int event_notifier_get_fd(EventNotifier *);
|
||||
int event_notifier_set(EventNotifier *);
|
||||
int event_notifier_test_and_clear(EventNotifier *);
|
||||
int event_notifier_set_handler(EventNotifier *, EventNotifierHandler *);
|
||||
|
||||
#ifdef CONFIG_POSIX
|
||||
void event_notifier_init_fd(EventNotifier *, int fd);
|
||||
int event_notifier_get_fd(EventNotifier *);
|
||||
#else
|
||||
HANDLE event_notifier_get_handle(EventNotifier *);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
1
hw/hw.h
1
hw/hw.h
@ -10,6 +10,7 @@
|
||||
|
||||
#include "ioport.h"
|
||||
#include "irq.h"
|
||||
#include "qemu-aio.h"
|
||||
#include "qemu-file.h"
|
||||
#include "vmstate.h"
|
||||
#include "qemu-log.h"
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "qemu-common.h"
|
||||
#include "qemu-char.h"
|
||||
#include "qemu-queue.h"
|
||||
#include "qemu-aio.h"
|
||||
#include "main-loop.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
|
103
iov.c
103
iov.c
@ -251,3 +251,106 @@ unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
|
||||
assert(offset == 0);
|
||||
return j;
|
||||
}
|
||||
|
||||
/* io vectors */
|
||||
|
||||
void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
|
||||
{
|
||||
qiov->iov = g_malloc(alloc_hint * sizeof(struct iovec));
|
||||
qiov->niov = 0;
|
||||
qiov->nalloc = alloc_hint;
|
||||
qiov->size = 0;
|
||||
}
|
||||
|
||||
void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
|
||||
{
|
||||
int i;
|
||||
|
||||
qiov->iov = iov;
|
||||
qiov->niov = niov;
|
||||
qiov->nalloc = -1;
|
||||
qiov->size = 0;
|
||||
for (i = 0; i < niov; i++)
|
||||
qiov->size += iov[i].iov_len;
|
||||
}
|
||||
|
||||
void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
|
||||
{
|
||||
assert(qiov->nalloc != -1);
|
||||
|
||||
if (qiov->niov == qiov->nalloc) {
|
||||
qiov->nalloc = 2 * qiov->nalloc + 1;
|
||||
qiov->iov = g_realloc(qiov->iov, qiov->nalloc * sizeof(struct iovec));
|
||||
}
|
||||
qiov->iov[qiov->niov].iov_base = base;
|
||||
qiov->iov[qiov->niov].iov_len = len;
|
||||
qiov->size += len;
|
||||
++qiov->niov;
|
||||
}
|
||||
|
||||
/*
|
||||
* Concatenates (partial) iovecs from src to the end of dst.
|
||||
* It starts copying after skipping `soffset' bytes at the
|
||||
* beginning of src and adds individual vectors from src to
|
||||
* dst copies up to `sbytes' bytes total, or up to the end
|
||||
* of src if it comes first. This way, it is okay to specify
|
||||
* very large value for `sbytes' to indicate "up to the end
|
||||
* of src".
|
||||
* Only vector pointers are processed, not the actual data buffers.
|
||||
*/
|
||||
void qemu_iovec_concat(QEMUIOVector *dst,
|
||||
QEMUIOVector *src, size_t soffset, size_t sbytes)
|
||||
{
|
||||
int i;
|
||||
size_t done;
|
||||
struct iovec *siov = src->iov;
|
||||
assert(dst->nalloc != -1);
|
||||
assert(src->size >= soffset);
|
||||
for (i = 0, done = 0; done < sbytes && i < src->niov; i++) {
|
||||
if (soffset < siov[i].iov_len) {
|
||||
size_t len = MIN(siov[i].iov_len - soffset, sbytes - done);
|
||||
qemu_iovec_add(dst, siov[i].iov_base + soffset, len);
|
||||
done += len;
|
||||
soffset = 0;
|
||||
} else {
|
||||
soffset -= siov[i].iov_len;
|
||||
}
|
||||
}
|
||||
/* return done; */
|
||||
}
|
||||
|
||||
void qemu_iovec_destroy(QEMUIOVector *qiov)
|
||||
{
|
||||
assert(qiov->nalloc != -1);
|
||||
|
||||
qemu_iovec_reset(qiov);
|
||||
g_free(qiov->iov);
|
||||
qiov->nalloc = 0;
|
||||
qiov->iov = NULL;
|
||||
}
|
||||
|
||||
void qemu_iovec_reset(QEMUIOVector *qiov)
|
||||
{
|
||||
assert(qiov->nalloc != -1);
|
||||
|
||||
qiov->niov = 0;
|
||||
qiov->size = 0;
|
||||
}
|
||||
|
||||
size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
|
||||
void *buf, size_t bytes)
|
||||
{
|
||||
return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes);
|
||||
}
|
||||
|
||||
size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
|
||||
const void *buf, size_t bytes)
|
||||
{
|
||||
return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes);
|
||||
}
|
||||
|
||||
size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
|
||||
int fillc, size_t bytes)
|
||||
{
|
||||
return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
|
||||
}
|
||||
|
165
main-loop.c
165
main-loop.c
@ -26,75 +26,12 @@
|
||||
#include "qemu-timer.h"
|
||||
#include "slirp/slirp.h"
|
||||
#include "main-loop.h"
|
||||
#include "qemu-aio.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
|
||||
#include "compatfd.h"
|
||||
|
||||
static int io_thread_fd = -1;
|
||||
|
||||
void qemu_notify_event(void)
|
||||
{
|
||||
/* Write 8 bytes to be compatible with eventfd. */
|
||||
static const uint64_t val = 1;
|
||||
ssize_t ret;
|
||||
|
||||
if (io_thread_fd == -1) {
|
||||
return;
|
||||
}
|
||||
do {
|
||||
ret = write(io_thread_fd, &val, sizeof(val));
|
||||
} while (ret < 0 && errno == EINTR);
|
||||
|
||||
/* EAGAIN is fine, a read must be pending. */
|
||||
if (ret < 0 && errno != EAGAIN) {
|
||||
fprintf(stderr, "qemu_notify_event: write() failed: %s\n",
|
||||
strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void qemu_event_read(void *opaque)
|
||||
{
|
||||
int fd = (intptr_t)opaque;
|
||||
ssize_t len;
|
||||
char buffer[512];
|
||||
|
||||
/* Drain the notify pipe. For eventfd, only 8 bytes will be read. */
|
||||
do {
|
||||
len = read(fd, buffer, sizeof(buffer));
|
||||
} while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
|
||||
}
|
||||
|
||||
static int qemu_event_init(void)
|
||||
{
|
||||
int err;
|
||||
int fds[2];
|
||||
|
||||
err = qemu_eventfd(fds);
|
||||
if (err == -1) {
|
||||
return -errno;
|
||||
}
|
||||
err = fcntl_setfl(fds[0], O_NONBLOCK);
|
||||
if (err < 0) {
|
||||
goto fail;
|
||||
}
|
||||
err = fcntl_setfl(fds[1], O_NONBLOCK);
|
||||
if (err < 0) {
|
||||
goto fail;
|
||||
}
|
||||
qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL,
|
||||
(void *)(intptr_t)fds[0]);
|
||||
|
||||
io_thread_fd = fds[1];
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
close(fds[0]);
|
||||
close(fds[1]);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* If we have signalfd, we mask out the signals we want to handle and then
|
||||
* use signalfd to listen for them. We rely on whatever the current signal
|
||||
* handler is to dispatch the signals when we receive them.
|
||||
@ -164,44 +101,29 @@ static int qemu_signal_init(void)
|
||||
|
||||
#else /* _WIN32 */
|
||||
|
||||
static HANDLE qemu_event_handle = NULL;
|
||||
|
||||
static void dummy_event_handler(void *opaque)
|
||||
{
|
||||
}
|
||||
|
||||
static int qemu_event_init(void)
|
||||
{
|
||||
qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
if (!qemu_event_handle) {
|
||||
fprintf(stderr, "Failed CreateEvent: %ld\n", GetLastError());
|
||||
return -1;
|
||||
}
|
||||
qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void qemu_notify_event(void)
|
||||
{
|
||||
if (!qemu_event_handle) {
|
||||
return;
|
||||
}
|
||||
if (!SetEvent(qemu_event_handle)) {
|
||||
fprintf(stderr, "qemu_notify_event: SetEvent failed: %ld\n",
|
||||
GetLastError());
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static int qemu_signal_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
int main_loop_init(void)
|
||||
static AioContext *qemu_aio_context;
|
||||
|
||||
void qemu_notify_event(void)
|
||||
{
|
||||
if (!qemu_aio_context) {
|
||||
return;
|
||||
}
|
||||
aio_notify(qemu_aio_context);
|
||||
}
|
||||
|
||||
int qemu_init_main_loop(void)
|
||||
{
|
||||
int ret;
|
||||
GSource *src;
|
||||
|
||||
init_clocks();
|
||||
init_timer_alarm();
|
||||
|
||||
qemu_mutex_lock_iothread();
|
||||
ret = qemu_signal_init();
|
||||
@ -209,12 +131,10 @@ int main_loop_init(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Note eventfd must be drained before signalfd handlers run */
|
||||
ret = qemu_event_init();
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
qemu_aio_context = aio_context_new();
|
||||
src = aio_get_g_source(qemu_aio_context);
|
||||
g_source_attach(src, NULL);
|
||||
g_source_unref(src);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -400,7 +320,8 @@ void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
|
||||
|
||||
void qemu_fd_register(int fd)
|
||||
{
|
||||
WSAEventSelect(fd, qemu_event_handle, FD_READ | FD_ACCEPT | FD_CLOSE |
|
||||
WSAEventSelect(fd, event_notifier_get_handle(&qemu_aio_context->notifier),
|
||||
FD_READ | FD_ACCEPT | FD_CLOSE |
|
||||
FD_CONNECT | FD_WRITE | FD_OOB);
|
||||
}
|
||||
|
||||
@ -477,8 +398,6 @@ int main_loop_wait(int nonblocking)
|
||||
|
||||
if (nonblocking) {
|
||||
timeout = 0;
|
||||
} else {
|
||||
qemu_bh_update_timeout(&timeout);
|
||||
}
|
||||
|
||||
/* poll any events */
|
||||
@ -501,9 +420,41 @@ int main_loop_wait(int nonblocking)
|
||||
|
||||
qemu_run_all_timers();
|
||||
|
||||
/* Check bottom-halves last in case any of the earlier events triggered
|
||||
them. */
|
||||
qemu_bh_poll();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Functions to operate on the main QEMU AioContext. */
|
||||
|
||||
QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
|
||||
{
|
||||
return aio_bh_new(qemu_aio_context, cb, opaque);
|
||||
}
|
||||
|
||||
void qemu_aio_flush(void)
|
||||
{
|
||||
aio_flush(qemu_aio_context);
|
||||
}
|
||||
|
||||
bool qemu_aio_wait(void)
|
||||
{
|
||||
return aio_poll(qemu_aio_context, true);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_POSIX
|
||||
void qemu_aio_set_fd_handler(int fd,
|
||||
IOHandler *io_read,
|
||||
IOHandler *io_write,
|
||||
AioFlushHandler *io_flush,
|
||||
void *opaque)
|
||||
{
|
||||
aio_set_fd_handler(qemu_aio_context, fd, io_read, io_write, io_flush,
|
||||
opaque);
|
||||
}
|
||||
#endif
|
||||
|
||||
void qemu_aio_set_event_notifier(EventNotifier *notifier,
|
||||
EventNotifierHandler *io_read,
|
||||
AioFlushEventNotifierHandler *io_flush)
|
||||
{
|
||||
aio_set_event_notifier(qemu_aio_context, notifier, io_read, io_flush);
|
||||
}
|
||||
|
66
main-loop.h
66
main-loop.h
@ -25,6 +25,8 @@
|
||||
#ifndef QEMU_MAIN_LOOP_H
|
||||
#define QEMU_MAIN_LOOP_H 1
|
||||
|
||||
#include "qemu-aio.h"
|
||||
|
||||
#define SIG_IPI SIGUSR1
|
||||
|
||||
/**
|
||||
@ -42,16 +44,6 @@
|
||||
*/
|
||||
int qemu_init_main_loop(void);
|
||||
|
||||
/**
|
||||
* main_loop_init: Initializes main loop
|
||||
*
|
||||
* Internal (but shared for compatibility reasons) initialization routine
|
||||
* for the main loop. This should not be used by applications directly,
|
||||
* use qemu_init_main_loop() instead.
|
||||
*
|
||||
*/
|
||||
int main_loop_init(void);
|
||||
|
||||
/**
|
||||
* main_loop_wait: Run one iteration of the main loop.
|
||||
*
|
||||
@ -173,7 +165,6 @@ void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque);
|
||||
|
||||
typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size);
|
||||
typedef int IOCanReadHandler(void *opaque);
|
||||
typedef void IOHandler(void *opaque);
|
||||
|
||||
/**
|
||||
* qemu_set_fd_handler2: Register a file descriptor with the main loop
|
||||
@ -254,56 +245,6 @@ int qemu_set_fd_handler(int fd,
|
||||
IOHandler *fd_write,
|
||||
void *opaque);
|
||||
|
||||
typedef struct QEMUBH QEMUBH;
|
||||
typedef void QEMUBHFunc(void *opaque);
|
||||
|
||||
/**
|
||||
* qemu_bh_new: Allocate a new bottom half structure.
|
||||
*
|
||||
* Bottom halves are lightweight callbacks whose invocation is guaranteed
|
||||
* to be wait-free, thread-safe and signal-safe. The #QEMUBH structure
|
||||
* is opaque and must be allocated prior to its use.
|
||||
*/
|
||||
QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
|
||||
|
||||
/**
|
||||
* qemu_bh_schedule: Schedule a bottom half.
|
||||
*
|
||||
* Scheduling a bottom half interrupts the main loop and causes the
|
||||
* execution of the callback that was passed to qemu_bh_new.
|
||||
*
|
||||
* Bottom halves that are scheduled from a bottom half handler are instantly
|
||||
* invoked. This can create an infinite loop if a bottom half handler
|
||||
* schedules itself.
|
||||
*
|
||||
* @bh: The bottom half to be scheduled.
|
||||
*/
|
||||
void qemu_bh_schedule(QEMUBH *bh);
|
||||
|
||||
/**
|
||||
* qemu_bh_cancel: Cancel execution of a bottom half.
|
||||
*
|
||||
* Canceling execution of a bottom half undoes the effect of calls to
|
||||
* qemu_bh_schedule without freeing its resources yet. While cancellation
|
||||
* itself is also wait-free and thread-safe, it can of course race with the
|
||||
* loop that executes bottom halves unless you are holding the iothread
|
||||
* mutex. This makes it mostly useless if you are not holding the mutex.
|
||||
*
|
||||
* @bh: The bottom half to be canceled.
|
||||
*/
|
||||
void qemu_bh_cancel(QEMUBH *bh);
|
||||
|
||||
/**
|
||||
*qemu_bh_delete: Cancel execution of a bottom half and free its resources.
|
||||
*
|
||||
* Deleting a bottom half frees the memory that was allocated for it by
|
||||
* qemu_bh_new. It also implies canceling the bottom half if it was
|
||||
* scheduled.
|
||||
*
|
||||
* @bh: The bottom half to be deleted.
|
||||
*/
|
||||
void qemu_bh_delete(QEMUBH *bh);
|
||||
|
||||
#ifdef CONFIG_POSIX
|
||||
/**
|
||||
* qemu_add_child_watch: Register a child process for reaping.
|
||||
@ -359,8 +300,7 @@ void qemu_fd_register(int fd);
|
||||
void qemu_iohandler_fill(int *pnfds, fd_set *readfds, fd_set *writefds, fd_set *xfds);
|
||||
void qemu_iohandler_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds, int rc);
|
||||
|
||||
QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
|
||||
void qemu_bh_schedule_idle(QEMUBH *bh);
|
||||
int qemu_bh_poll(void);
|
||||
void qemu_bh_update_timeout(uint32_t *timeout);
|
||||
|
||||
#endif
|
||||
|
30
osdep.c
30
osdep.c
@ -134,6 +134,11 @@ fail:
|
||||
errno = serrno;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int qemu_parse_fdset(const char *param)
|
||||
{
|
||||
return qemu_parse_fd(param);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -394,3 +399,28 @@ bool fips_get_state(void)
|
||||
{
|
||||
return fips_enabled;
|
||||
}
|
||||
|
||||
|
||||
static int default_fdset_get_fd(int64_t fdset_id, int flags)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
QEMU_WEAK_ALIAS(monitor_fdset_get_fd, default_fdset_get_fd);
|
||||
|
||||
static int default_fdset_dup_fd_add(int64_t fdset_id, int dup_fd)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
QEMU_WEAK_ALIAS(monitor_fdset_dup_fd_add, default_fdset_dup_fd_add);
|
||||
|
||||
static int default_fdset_dup_fd_remove(int dup_fd)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
QEMU_WEAK_ALIAS(monitor_fdset_dup_fd_remove, default_fdset_dup_fd_remove);
|
||||
|
||||
static int default_fdset_dup_fd_find(int dup_fd)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
QEMU_WEAK_ALIAS(monitor_fdset_dup_fd_find, default_fdset_dup_fd_find);
|
||||
|
@ -61,9 +61,6 @@ static int running_on_valgrind = -1;
|
||||
#ifdef CONFIG_LINUX
|
||||
#include <sys/syscall.h>
|
||||
#endif
|
||||
#ifdef CONFIG_EVENTFD
|
||||
#include <sys/eventfd.h>
|
||||
#endif
|
||||
|
||||
int qemu_get_thread_id(void)
|
||||
{
|
||||
@ -183,34 +180,6 @@ int qemu_pipe(int pipefd[2])
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set.
|
||||
*/
|
||||
int qemu_eventfd(int fds[2])
|
||||
{
|
||||
#ifdef CONFIG_EVENTFD
|
||||
int ret;
|
||||
|
||||
ret = eventfd(0, 0);
|
||||
if (ret >= 0) {
|
||||
fds[0] = ret;
|
||||
fds[1] = dup(ret);
|
||||
if (fds[1] == -1) {
|
||||
close(ret);
|
||||
return -1;
|
||||
}
|
||||
qemu_set_cloexec(ret);
|
||||
qemu_set_cloexec(fds[1]);
|
||||
return 0;
|
||||
}
|
||||
if (errno != ENOSYS) {
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
return qemu_pipe(fds);
|
||||
}
|
||||
|
||||
int qemu_utimens(const char *path, const struct timespec *times)
|
||||
{
|
||||
struct timeval tv[2], tv_now;
|
||||
|
@ -150,3 +150,8 @@ int qemu_get_thread_id(void)
|
||||
{
|
||||
return GetCurrentThreadId();
|
||||
}
|
||||
|
||||
static void default_qemu_fd_register(int fd)
|
||||
{
|
||||
}
|
||||
QEMU_WEAK_ALIAS(qemu_fd_register, default_qemu_fd_register);
|
||||
|
@ -1,679 +0,0 @@
|
||||
/*
|
||||
* QEMU posix-aio emulation
|
||||
*
|
||||
* Copyright IBM, Corp. 2008
|
||||
*
|
||||
* Authors:
|
||||
* Anthony Liguori <aliguori@us.ibm.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
*
|
||||
* Contributions after 2012-01-13 are licensed under the terms of the
|
||||
* GNU GPL, version 2 or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/types.h>
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <time.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "qemu-queue.h"
|
||||
#include "osdep.h"
|
||||
#include "sysemu.h"
|
||||
#include "qemu-common.h"
|
||||
#include "trace.h"
|
||||
#include "block_int.h"
|
||||
#include "iov.h"
|
||||
|
||||
#include "block/raw-posix-aio.h"
|
||||
|
||||
static void do_spawn_thread(void);
|
||||
|
||||
struct qemu_paiocb {
|
||||
BlockDriverAIOCB common;
|
||||
int aio_fildes;
|
||||
union {
|
||||
struct iovec *aio_iov;
|
||||
void *aio_ioctl_buf;
|
||||
};
|
||||
int aio_niov;
|
||||
size_t aio_nbytes;
|
||||
#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
|
||||
off_t aio_offset;
|
||||
|
||||
QTAILQ_ENTRY(qemu_paiocb) node;
|
||||
int aio_type;
|
||||
ssize_t ret;
|
||||
int active;
|
||||
struct qemu_paiocb *next;
|
||||
};
|
||||
|
||||
typedef struct PosixAioState {
|
||||
int rfd, wfd;
|
||||
struct qemu_paiocb *first_aio;
|
||||
} PosixAioState;
|
||||
|
||||
|
||||
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
|
||||
static pthread_t thread_id;
|
||||
static pthread_attr_t attr;
|
||||
static int max_threads = 64;
|
||||
static int cur_threads = 0;
|
||||
static int idle_threads = 0;
|
||||
static int new_threads = 0; /* backlog of threads we need to create */
|
||||
static int pending_threads = 0; /* threads created but not running yet */
|
||||
static QEMUBH *new_thread_bh;
|
||||
static QTAILQ_HEAD(, qemu_paiocb) request_list;
|
||||
|
||||
#ifdef CONFIG_PREADV
|
||||
static int preadv_present = 1;
|
||||
#else
|
||||
static int preadv_present = 0;
|
||||
#endif
|
||||
|
||||
static void die2(int err, const char *what)
|
||||
{
|
||||
fprintf(stderr, "%s failed: %s\n", what, strerror(err));
|
||||
abort();
|
||||
}
|
||||
|
||||
static void die(const char *what)
|
||||
{
|
||||
die2(errno, what);
|
||||
}
|
||||
|
||||
static void mutex_lock(pthread_mutex_t *mutex)
|
||||
{
|
||||
int ret = pthread_mutex_lock(mutex);
|
||||
if (ret) die2(ret, "pthread_mutex_lock");
|
||||
}
|
||||
|
||||
static void mutex_unlock(pthread_mutex_t *mutex)
|
||||
{
|
||||
int ret = pthread_mutex_unlock(mutex);
|
||||
if (ret) die2(ret, "pthread_mutex_unlock");
|
||||
}
|
||||
|
||||
static int cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
|
||||
struct timespec *ts)
|
||||
{
|
||||
int ret = pthread_cond_timedwait(cond, mutex, ts);
|
||||
if (ret && ret != ETIMEDOUT) die2(ret, "pthread_cond_timedwait");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void cond_signal(pthread_cond_t *cond)
|
||||
{
|
||||
int ret = pthread_cond_signal(cond);
|
||||
if (ret) die2(ret, "pthread_cond_signal");
|
||||
}
|
||||
|
||||
static void thread_create(pthread_t *thread, pthread_attr_t *attr,
|
||||
void *(*start_routine)(void*), void *arg)
|
||||
{
|
||||
int ret = pthread_create(thread, attr, start_routine, arg);
|
||||
if (ret) die2(ret, "pthread_create");
|
||||
}
|
||||
|
||||
static ssize_t handle_aiocb_ioctl(struct qemu_paiocb *aiocb)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
|
||||
if (ret == -1)
|
||||
return -errno;
|
||||
|
||||
/*
|
||||
* This looks weird, but the aio code only considers a request
|
||||
* successful if it has written the full number of bytes.
|
||||
*
|
||||
* Now we overload aio_nbytes as aio_ioctl_cmd for the ioctl command,
|
||||
* so in fact we return the ioctl command here to make posix_aio_read()
|
||||
* happy..
|
||||
*/
|
||||
return aiocb->aio_nbytes;
|
||||
}
|
||||
|
||||
static ssize_t handle_aiocb_flush(struct qemu_paiocb *aiocb)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = qemu_fdatasync(aiocb->aio_fildes);
|
||||
if (ret == -1)
|
||||
return -errno;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PREADV
|
||||
|
||||
static ssize_t
|
||||
qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
|
||||
{
|
||||
return preadv(fd, iov, nr_iov, offset);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
|
||||
{
|
||||
return pwritev(fd, iov, nr_iov, offset);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static ssize_t
|
||||
qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static ssize_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
|
||||
{
|
||||
ssize_t len;
|
||||
|
||||
do {
|
||||
if (aiocb->aio_type & QEMU_AIO_WRITE)
|
||||
len = qemu_pwritev(aiocb->aio_fildes,
|
||||
aiocb->aio_iov,
|
||||
aiocb->aio_niov,
|
||||
aiocb->aio_offset);
|
||||
else
|
||||
len = qemu_preadv(aiocb->aio_fildes,
|
||||
aiocb->aio_iov,
|
||||
aiocb->aio_niov,
|
||||
aiocb->aio_offset);
|
||||
} while (len == -1 && errno == EINTR);
|
||||
|
||||
if (len == -1)
|
||||
return -errno;
|
||||
return len;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read/writes the data to/from a given linear buffer.
|
||||
*
|
||||
* Returns the number of bytes handles or -errno in case of an error. Short
|
||||
* reads are only returned if the end of the file is reached.
|
||||
*/
|
||||
static ssize_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
|
||||
{
|
||||
ssize_t offset = 0;
|
||||
ssize_t len;
|
||||
|
||||
while (offset < aiocb->aio_nbytes) {
|
||||
if (aiocb->aio_type & QEMU_AIO_WRITE)
|
||||
len = pwrite(aiocb->aio_fildes,
|
||||
(const char *)buf + offset,
|
||||
aiocb->aio_nbytes - offset,
|
||||
aiocb->aio_offset + offset);
|
||||
else
|
||||
len = pread(aiocb->aio_fildes,
|
||||
buf + offset,
|
||||
aiocb->aio_nbytes - offset,
|
||||
aiocb->aio_offset + offset);
|
||||
|
||||
if (len == -1 && errno == EINTR)
|
||||
continue;
|
||||
else if (len == -1) {
|
||||
offset = -errno;
|
||||
break;
|
||||
} else if (len == 0)
|
||||
break;
|
||||
|
||||
offset += len;
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
static ssize_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
|
||||
{
|
||||
ssize_t nbytes;
|
||||
char *buf;
|
||||
|
||||
if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
|
||||
/*
|
||||
* If there is just a single buffer, and it is properly aligned
|
||||
* we can just use plain pread/pwrite without any problems.
|
||||
*/
|
||||
if (aiocb->aio_niov == 1)
|
||||
return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
|
||||
|
||||
/*
|
||||
* We have more than one iovec, and all are properly aligned.
|
||||
*
|
||||
* Try preadv/pwritev first and fall back to linearizing the
|
||||
* buffer if it's not supported.
|
||||
*/
|
||||
if (preadv_present) {
|
||||
nbytes = handle_aiocb_rw_vector(aiocb);
|
||||
if (nbytes == aiocb->aio_nbytes)
|
||||
return nbytes;
|
||||
if (nbytes < 0 && nbytes != -ENOSYS)
|
||||
return nbytes;
|
||||
preadv_present = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX(hch): short read/write. no easy way to handle the reminder
|
||||
* using these interfaces. For now retry using plain
|
||||
* pread/pwrite?
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Ok, we have to do it the hard way, copy all segments into
|
||||
* a single aligned buffer.
|
||||
*/
|
||||
buf = qemu_blockalign(aiocb->common.bs, aiocb->aio_nbytes);
|
||||
if (aiocb->aio_type & QEMU_AIO_WRITE) {
|
||||
char *p = buf;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < aiocb->aio_niov; ++i) {
|
||||
memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
|
||||
p += aiocb->aio_iov[i].iov_len;
|
||||
}
|
||||
}
|
||||
|
||||
nbytes = handle_aiocb_rw_linear(aiocb, buf);
|
||||
if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
|
||||
char *p = buf;
|
||||
size_t count = aiocb->aio_nbytes, copy;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < aiocb->aio_niov && count; ++i) {
|
||||
copy = count;
|
||||
if (copy > aiocb->aio_iov[i].iov_len)
|
||||
copy = aiocb->aio_iov[i].iov_len;
|
||||
memcpy(aiocb->aio_iov[i].iov_base, p, copy);
|
||||
p += copy;
|
||||
count -= copy;
|
||||
}
|
||||
}
|
||||
qemu_vfree(buf);
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static void posix_aio_notify_event(void);
|
||||
|
||||
static void *aio_thread(void *unused)
|
||||
{
|
||||
mutex_lock(&lock);
|
||||
pending_threads--;
|
||||
mutex_unlock(&lock);
|
||||
do_spawn_thread();
|
||||
|
||||
while (1) {
|
||||
struct qemu_paiocb *aiocb;
|
||||
ssize_t ret = 0;
|
||||
qemu_timeval tv;
|
||||
struct timespec ts;
|
||||
|
||||
qemu_gettimeofday(&tv);
|
||||
ts.tv_sec = tv.tv_sec + 10;
|
||||
ts.tv_nsec = 0;
|
||||
|
||||
mutex_lock(&lock);
|
||||
|
||||
while (QTAILQ_EMPTY(&request_list) &&
|
||||
!(ret == ETIMEDOUT)) {
|
||||
idle_threads++;
|
||||
ret = cond_timedwait(&cond, &lock, &ts);
|
||||
idle_threads--;
|
||||
}
|
||||
|
||||
if (QTAILQ_EMPTY(&request_list))
|
||||
break;
|
||||
|
||||
aiocb = QTAILQ_FIRST(&request_list);
|
||||
QTAILQ_REMOVE(&request_list, aiocb, node);
|
||||
aiocb->active = 1;
|
||||
mutex_unlock(&lock);
|
||||
|
||||
switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
|
||||
case QEMU_AIO_READ:
|
||||
ret = handle_aiocb_rw(aiocb);
|
||||
if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) {
|
||||
/* A short read means that we have reached EOF. Pad the buffer
|
||||
* with zeros for bytes after EOF. */
|
||||
iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
|
||||
0, aiocb->aio_nbytes - ret);
|
||||
|
||||
ret = aiocb->aio_nbytes;
|
||||
}
|
||||
break;
|
||||
case QEMU_AIO_WRITE:
|
||||
ret = handle_aiocb_rw(aiocb);
|
||||
break;
|
||||
case QEMU_AIO_FLUSH:
|
||||
ret = handle_aiocb_flush(aiocb);
|
||||
break;
|
||||
case QEMU_AIO_IOCTL:
|
||||
ret = handle_aiocb_ioctl(aiocb);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
mutex_lock(&lock);
|
||||
aiocb->ret = ret;
|
||||
mutex_unlock(&lock);
|
||||
|
||||
posix_aio_notify_event();
|
||||
}
|
||||
|
||||
cur_threads--;
|
||||
mutex_unlock(&lock);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void do_spawn_thread(void)
|
||||
{
|
||||
sigset_t set, oldset;
|
||||
|
||||
mutex_lock(&lock);
|
||||
if (!new_threads) {
|
||||
mutex_unlock(&lock);
|
||||
return;
|
||||
}
|
||||
|
||||
new_threads--;
|
||||
pending_threads++;
|
||||
|
||||
mutex_unlock(&lock);
|
||||
|
||||
/* block all signals */
|
||||
if (sigfillset(&set)) die("sigfillset");
|
||||
if (sigprocmask(SIG_SETMASK, &set, &oldset)) die("sigprocmask");
|
||||
|
||||
thread_create(&thread_id, &attr, aio_thread, NULL);
|
||||
|
||||
if (sigprocmask(SIG_SETMASK, &oldset, NULL)) die("sigprocmask restore");
|
||||
}
|
||||
|
||||
static void spawn_thread_bh_fn(void *opaque)
|
||||
{
|
||||
do_spawn_thread();
|
||||
}
|
||||
|
||||
static void spawn_thread(void)
|
||||
{
|
||||
cur_threads++;
|
||||
new_threads++;
|
||||
/* If there are threads being created, they will spawn new workers, so
|
||||
* we don't spend time creating many threads in a loop holding a mutex or
|
||||
* starving the current vcpu.
|
||||
*
|
||||
* If there are no idle threads, ask the main thread to create one, so we
|
||||
* inherit the correct affinity instead of the vcpu affinity.
|
||||
*/
|
||||
if (!pending_threads) {
|
||||
qemu_bh_schedule(new_thread_bh);
|
||||
}
|
||||
}
|
||||
|
||||
static void qemu_paio_submit(struct qemu_paiocb *aiocb)
|
||||
{
|
||||
aiocb->ret = -EINPROGRESS;
|
||||
aiocb->active = 0;
|
||||
mutex_lock(&lock);
|
||||
if (idle_threads == 0 && cur_threads < max_threads)
|
||||
spawn_thread();
|
||||
QTAILQ_INSERT_TAIL(&request_list, aiocb, node);
|
||||
mutex_unlock(&lock);
|
||||
cond_signal(&cond);
|
||||
}
|
||||
|
||||
static ssize_t qemu_paio_return(struct qemu_paiocb *aiocb)
|
||||
{
|
||||
ssize_t ret;
|
||||
|
||||
mutex_lock(&lock);
|
||||
ret = aiocb->ret;
|
||||
mutex_unlock(&lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int qemu_paio_error(struct qemu_paiocb *aiocb)
|
||||
{
|
||||
ssize_t ret = qemu_paio_return(aiocb);
|
||||
|
||||
if (ret < 0)
|
||||
ret = -ret;
|
||||
else
|
||||
ret = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void posix_aio_read(void *opaque)
|
||||
{
|
||||
PosixAioState *s = opaque;
|
||||
struct qemu_paiocb *acb, **pacb;
|
||||
int ret;
|
||||
ssize_t len;
|
||||
|
||||
/* read all bytes from signal pipe */
|
||||
for (;;) {
|
||||
char bytes[16];
|
||||
|
||||
len = read(s->rfd, bytes, sizeof(bytes));
|
||||
if (len == -1 && errno == EINTR)
|
||||
continue; /* try again */
|
||||
if (len == sizeof(bytes))
|
||||
continue; /* more to read */
|
||||
break;
|
||||
}
|
||||
|
||||
for(;;) {
|
||||
pacb = &s->first_aio;
|
||||
for(;;) {
|
||||
acb = *pacb;
|
||||
if (!acb)
|
||||
return;
|
||||
|
||||
ret = qemu_paio_error(acb);
|
||||
if (ret == ECANCELED) {
|
||||
/* remove the request */
|
||||
*pacb = acb->next;
|
||||
qemu_aio_release(acb);
|
||||
} else if (ret != EINPROGRESS) {
|
||||
/* end of aio */
|
||||
if (ret == 0) {
|
||||
ret = qemu_paio_return(acb);
|
||||
if (ret == acb->aio_nbytes)
|
||||
ret = 0;
|
||||
else
|
||||
ret = -EINVAL;
|
||||
} else {
|
||||
ret = -ret;
|
||||
}
|
||||
|
||||
trace_paio_complete(acb, acb->common.opaque, ret);
|
||||
|
||||
/* remove the request */
|
||||
*pacb = acb->next;
|
||||
/* call the callback */
|
||||
acb->common.cb(acb->common.opaque, ret);
|
||||
qemu_aio_release(acb);
|
||||
break;
|
||||
} else {
|
||||
pacb = &acb->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int posix_aio_flush(void *opaque)
|
||||
{
|
||||
PosixAioState *s = opaque;
|
||||
return !!s->first_aio;
|
||||
}
|
||||
|
||||
static PosixAioState *posix_aio_state;
|
||||
|
||||
static void posix_aio_notify_event(void)
|
||||
{
|
||||
char byte = 0;
|
||||
ssize_t ret;
|
||||
|
||||
ret = write(posix_aio_state->wfd, &byte, sizeof(byte));
|
||||
if (ret < 0 && errno != EAGAIN)
|
||||
die("write()");
|
||||
}
|
||||
|
||||
static void paio_remove(struct qemu_paiocb *acb)
|
||||
{
|
||||
struct qemu_paiocb **pacb;
|
||||
|
||||
/* remove the callback from the queue */
|
||||
pacb = &posix_aio_state->first_aio;
|
||||
for(;;) {
|
||||
if (*pacb == NULL) {
|
||||
fprintf(stderr, "paio_remove: aio request not found!\n");
|
||||
break;
|
||||
} else if (*pacb == acb) {
|
||||
*pacb = acb->next;
|
||||
qemu_aio_release(acb);
|
||||
break;
|
||||
}
|
||||
pacb = &(*pacb)->next;
|
||||
}
|
||||
}
|
||||
|
||||
static void paio_cancel(BlockDriverAIOCB *blockacb)
|
||||
{
|
||||
struct qemu_paiocb *acb = (struct qemu_paiocb *)blockacb;
|
||||
int active = 0;
|
||||
|
||||
trace_paio_cancel(acb, acb->common.opaque);
|
||||
|
||||
mutex_lock(&lock);
|
||||
if (!acb->active) {
|
||||
QTAILQ_REMOVE(&request_list, acb, node);
|
||||
acb->ret = -ECANCELED;
|
||||
} else if (acb->ret == -EINPROGRESS) {
|
||||
active = 1;
|
||||
}
|
||||
mutex_unlock(&lock);
|
||||
|
||||
if (active) {
|
||||
/* fail safe: if the aio could not be canceled, we wait for
|
||||
it */
|
||||
while (qemu_paio_error(acb) == EINPROGRESS)
|
||||
;
|
||||
}
|
||||
|
||||
paio_remove(acb);
|
||||
}
|
||||
|
||||
static AIOPool raw_aio_pool = {
|
||||
.aiocb_size = sizeof(struct qemu_paiocb),
|
||||
.cancel = paio_cancel,
|
||||
};
|
||||
|
||||
BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
|
||||
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
|
||||
BlockDriverCompletionFunc *cb, void *opaque, int type)
|
||||
{
|
||||
struct qemu_paiocb *acb;
|
||||
|
||||
acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
|
||||
acb->aio_type = type;
|
||||
acb->aio_fildes = fd;
|
||||
|
||||
if (qiov) {
|
||||
acb->aio_iov = qiov->iov;
|
||||
acb->aio_niov = qiov->niov;
|
||||
}
|
||||
acb->aio_nbytes = nb_sectors * 512;
|
||||
acb->aio_offset = sector_num * 512;
|
||||
|
||||
acb->next = posix_aio_state->first_aio;
|
||||
posix_aio_state->first_aio = acb;
|
||||
|
||||
trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
|
||||
qemu_paio_submit(acb);
|
||||
return &acb->common;
|
||||
}
|
||||
|
||||
BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
|
||||
unsigned long int req, void *buf,
|
||||
BlockDriverCompletionFunc *cb, void *opaque)
|
||||
{
|
||||
struct qemu_paiocb *acb;
|
||||
|
||||
acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
|
||||
acb->aio_type = QEMU_AIO_IOCTL;
|
||||
acb->aio_fildes = fd;
|
||||
acb->aio_offset = 0;
|
||||
acb->aio_ioctl_buf = buf;
|
||||
acb->aio_ioctl_cmd = req;
|
||||
|
||||
acb->next = posix_aio_state->first_aio;
|
||||
posix_aio_state->first_aio = acb;
|
||||
|
||||
qemu_paio_submit(acb);
|
||||
return &acb->common;
|
||||
}
|
||||
|
||||
int paio_init(void)
|
||||
{
|
||||
PosixAioState *s;
|
||||
int fds[2];
|
||||
int ret;
|
||||
|
||||
if (posix_aio_state)
|
||||
return 0;
|
||||
|
||||
s = g_malloc(sizeof(PosixAioState));
|
||||
|
||||
s->first_aio = NULL;
|
||||
if (qemu_pipe(fds) == -1) {
|
||||
fprintf(stderr, "failed to create pipe\n");
|
||||
g_free(s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
s->rfd = fds[0];
|
||||
s->wfd = fds[1];
|
||||
|
||||
fcntl(s->rfd, F_SETFL, O_NONBLOCK);
|
||||
fcntl(s->wfd, F_SETFL, O_NONBLOCK);
|
||||
|
||||
qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s);
|
||||
|
||||
ret = pthread_attr_init(&attr);
|
||||
if (ret)
|
||||
die2(ret, "pthread_attr_init");
|
||||
|
||||
ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
|
||||
if (ret)
|
||||
die2(ret, "pthread_attr_setdetachstate");
|
||||
|
||||
QTAILQ_INIT(&request_list);
|
||||
new_thread_bh = qemu_bh_new(spawn_thread_bh_fn, NULL);
|
||||
|
||||
posix_aio_state = s;
|
||||
return 0;
|
||||
}
|
@ -1,3 +1,5 @@
|
||||
qapi-obj-y = qapi-visit-core.o qapi-dealloc-visitor.o qmp-input-visitor.o
|
||||
qapi-obj-y += qmp-output-visitor.o qmp-registry.o qmp-dispatch.o
|
||||
qapi-obj-y += string-input-visitor.o string-output-visitor.o opts-visitor.o
|
||||
qapi-obj-y += string-input-visitor.o string-output-visitor.o
|
||||
|
||||
common-obj-y += opts-visitor.o
|
||||
|
206
qemu-aio.h
206
qemu-aio.h
@ -15,7 +15,8 @@
|
||||
#define QEMU_AIO_H
|
||||
|
||||
#include "qemu-common.h"
|
||||
#include "qemu-char.h"
|
||||
#include "qemu-queue.h"
|
||||
#include "event_notifier.h"
|
||||
|
||||
typedef struct BlockDriverAIOCB BlockDriverAIOCB;
|
||||
typedef void BlockDriverCompletionFunc(void *opaque, int ret);
|
||||
@ -38,20 +39,162 @@ void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
|
||||
BlockDriverCompletionFunc *cb, void *opaque);
|
||||
void qemu_aio_release(void *p);
|
||||
|
||||
typedef struct AioHandler AioHandler;
|
||||
typedef void QEMUBHFunc(void *opaque);
|
||||
typedef void IOHandler(void *opaque);
|
||||
|
||||
typedef struct AioContext {
|
||||
GSource source;
|
||||
|
||||
/* The list of registered AIO handlers */
|
||||
QLIST_HEAD(, AioHandler) aio_handlers;
|
||||
|
||||
/* This is a simple lock used to protect the aio_handlers list.
|
||||
* Specifically, it's used to ensure that no callbacks are removed while
|
||||
* we're walking and dispatching callbacks.
|
||||
*/
|
||||
int walking_handlers;
|
||||
|
||||
/* Anchor of the list of Bottom Halves belonging to the context */
|
||||
struct QEMUBH *first_bh;
|
||||
|
||||
/* A simple lock used to protect the first_bh list, and ensure that
|
||||
* no callbacks are removed while we're walking and dispatching callbacks.
|
||||
*/
|
||||
int walking_bh;
|
||||
|
||||
/* Used for aio_notify. */
|
||||
EventNotifier notifier;
|
||||
} AioContext;
|
||||
|
||||
/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
|
||||
typedef int (AioFlushHandler)(void *opaque);
|
||||
typedef int (AioFlushEventNotifierHandler)(EventNotifier *e);
|
||||
|
||||
/**
|
||||
* aio_context_new: Allocate a new AioContext.
|
||||
*
|
||||
* AioContext provide a mini event-loop that can be waited on synchronously.
|
||||
* They also provide bottom halves, a service to execute a piece of code
|
||||
* as soon as possible.
|
||||
*/
|
||||
AioContext *aio_context_new(void);
|
||||
|
||||
/**
|
||||
* aio_context_ref:
|
||||
* @ctx: The AioContext to operate on.
|
||||
*
|
||||
* Add a reference to an AioContext.
|
||||
*/
|
||||
void aio_context_ref(AioContext *ctx);
|
||||
|
||||
/**
|
||||
* aio_context_unref:
|
||||
* @ctx: The AioContext to operate on.
|
||||
*
|
||||
* Drop a reference to an AioContext.
|
||||
*/
|
||||
void aio_context_unref(AioContext *ctx);
|
||||
|
||||
/**
|
||||
* aio_bh_new: Allocate a new bottom half structure.
|
||||
*
|
||||
* Bottom halves are lightweight callbacks whose invocation is guaranteed
|
||||
* to be wait-free, thread-safe and signal-safe. The #QEMUBH structure
|
||||
* is opaque and must be allocated prior to its use.
|
||||
*/
|
||||
QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
|
||||
|
||||
/**
|
||||
* aio_notify: Force processing of pending events.
|
||||
*
|
||||
* Similar to signaling a condition variable, aio_notify forces
|
||||
* aio_wait to exit, so that the next call will re-examine pending events.
|
||||
* The caller of aio_notify will usually call aio_wait again very soon,
|
||||
* or go through another iteration of the GLib main loop. Hence, aio_notify
|
||||
* also has the side effect of recalculating the sets of file descriptors
|
||||
* that the main loop waits for.
|
||||
*
|
||||
* Calling aio_notify is rarely necessary, because for example scheduling
|
||||
* a bottom half calls it already.
|
||||
*/
|
||||
void aio_notify(AioContext *ctx);
|
||||
|
||||
/**
|
||||
* aio_bh_poll: Poll bottom halves for an AioContext.
|
||||
*
|
||||
* These are internal functions used by the QEMU main loop.
|
||||
*/
|
||||
int aio_bh_poll(AioContext *ctx);
|
||||
|
||||
/**
|
||||
* qemu_bh_schedule: Schedule a bottom half.
|
||||
*
|
||||
* Scheduling a bottom half interrupts the main loop and causes the
|
||||
* execution of the callback that was passed to qemu_bh_new.
|
||||
*
|
||||
* Bottom halves that are scheduled from a bottom half handler are instantly
|
||||
* invoked. This can create an infinite loop if a bottom half handler
|
||||
* schedules itself.
|
||||
*
|
||||
* @bh: The bottom half to be scheduled.
|
||||
*/
|
||||
void qemu_bh_schedule(QEMUBH *bh);
|
||||
|
||||
/**
|
||||
* qemu_bh_cancel: Cancel execution of a bottom half.
|
||||
*
|
||||
* Canceling execution of a bottom half undoes the effect of calls to
|
||||
* qemu_bh_schedule without freeing its resources yet. While cancellation
|
||||
* itself is also wait-free and thread-safe, it can of course race with the
|
||||
* loop that executes bottom halves unless you are holding the iothread
|
||||
* mutex. This makes it mostly useless if you are not holding the mutex.
|
||||
*
|
||||
* @bh: The bottom half to be canceled.
|
||||
*/
|
||||
void qemu_bh_cancel(QEMUBH *bh);
|
||||
|
||||
/**
|
||||
*qemu_bh_delete: Cancel execution of a bottom half and free its resources.
|
||||
*
|
||||
* Deleting a bottom half frees the memory that was allocated for it by
|
||||
* qemu_bh_new. It also implies canceling the bottom half if it was
|
||||
* scheduled.
|
||||
*
|
||||
* @bh: The bottom half to be deleted.
|
||||
*/
|
||||
void qemu_bh_delete(QEMUBH *bh);
|
||||
|
||||
/* Flush any pending AIO operation. This function will block until all
|
||||
* outstanding AIO operations have been completed or cancelled. */
|
||||
void qemu_aio_flush(void);
|
||||
void aio_flush(AioContext *ctx);
|
||||
|
||||
/* Wait for a single AIO completion to occur. This function will wait
|
||||
* until a single AIO event has completed and it will ensure something
|
||||
* has moved before returning. This can issue new pending aio as
|
||||
* result of executing I/O completion or bh callbacks.
|
||||
/* Return whether there are any pending callbacks from the GSource
|
||||
* attached to the AioContext.
|
||||
*
|
||||
* Return whether there is still any pending AIO operation. */
|
||||
bool qemu_aio_wait(void);
|
||||
* This is used internally in the implementation of the GSource.
|
||||
*/
|
||||
bool aio_pending(AioContext *ctx);
|
||||
|
||||
/* Progress in completing AIO work to occur. This can issue new pending
|
||||
* aio as a result of executing I/O completion or bh callbacks.
|
||||
*
|
||||
* If there is no pending AIO operation or completion (bottom half),
|
||||
* return false. If there are pending bottom halves, return true.
|
||||
*
|
||||
* If there are no pending bottom halves, but there are pending AIO
|
||||
* operations, it may not be possible to make any progress without
|
||||
* blocking. If @blocking is true, this function will wait until one
|
||||
* or more AIO events have completed, to ensure something has moved
|
||||
* before returning.
|
||||
*
|
||||
* If @blocking is false, this function will also return false if the
|
||||
* function cannot make any progress without blocking.
|
||||
*/
|
||||
bool aio_poll(AioContext *ctx, bool blocking);
|
||||
|
||||
#ifdef CONFIG_POSIX
|
||||
/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
|
||||
typedef int (AioFlushHandler)(void *opaque);
|
||||
|
||||
/* Register a file descriptor and associated callbacks. Behaves very similarly
|
||||
* to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will
|
||||
@ -60,10 +203,45 @@ bool qemu_aio_wait(void);
|
||||
* Code that invokes AIO completion functions should rely on this function
|
||||
* instead of qemu_set_fd_handler[2].
|
||||
*/
|
||||
int qemu_aio_set_fd_handler(int fd,
|
||||
IOHandler *io_read,
|
||||
IOHandler *io_write,
|
||||
AioFlushHandler *io_flush,
|
||||
void *opaque);
|
||||
void aio_set_fd_handler(AioContext *ctx,
|
||||
int fd,
|
||||
IOHandler *io_read,
|
||||
IOHandler *io_write,
|
||||
AioFlushHandler *io_flush,
|
||||
void *opaque);
|
||||
#endif
|
||||
|
||||
/* Register an event notifier and associated callbacks. Behaves very similarly
|
||||
* to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks
|
||||
* will be invoked when using either qemu_aio_wait() or qemu_aio_flush().
|
||||
*
|
||||
* Code that invokes AIO completion functions should rely on this function
|
||||
* instead of event_notifier_set_handler.
|
||||
*/
|
||||
void aio_set_event_notifier(AioContext *ctx,
|
||||
EventNotifier *notifier,
|
||||
EventNotifierHandler *io_read,
|
||||
AioFlushEventNotifierHandler *io_flush);
|
||||
|
||||
/* Return a GSource that lets the main loop poll the file descriptors attached
|
||||
* to this AioContext.
|
||||
*/
|
||||
GSource *aio_get_g_source(AioContext *ctx);
|
||||
|
||||
/* Functions to operate on the main QEMU AioContext. */
|
||||
|
||||
void qemu_aio_flush(void);
|
||||
bool qemu_aio_wait(void);
|
||||
void qemu_aio_set_event_notifier(EventNotifier *notifier,
|
||||
EventNotifierHandler *io_read,
|
||||
AioFlushEventNotifierHandler *io_flush);
|
||||
|
||||
#ifdef CONFIG_POSIX
|
||||
void qemu_aio_set_fd_handler(int fd,
|
||||
IOHandler *io_read,
|
||||
IOHandler *io_write,
|
||||
AioFlushHandler *io_flush,
|
||||
void *opaque);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "qemu-queue.h"
|
||||
#include "qemu-option.h"
|
||||
#include "qemu-config.h"
|
||||
#include "qemu-aio.h"
|
||||
#include "qobject.h"
|
||||
#include "qstring.h"
|
||||
#include "main-loop.h"
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
typedef struct QEMUTimer QEMUTimer;
|
||||
typedef struct QEMUFile QEMUFile;
|
||||
typedef struct QEMUBH QEMUBH;
|
||||
typedef struct DeviceState DeviceState;
|
||||
|
||||
struct Monitor;
|
||||
@ -167,7 +168,6 @@ int qemu_fls(int i);
|
||||
int qemu_fdatasync(int fd);
|
||||
int fcntl_setfl(int fd, int flag);
|
||||
int qemu_parse_fd(const char *param);
|
||||
int qemu_parse_fdset(const char *param);
|
||||
|
||||
/*
|
||||
* strtosz() suffixes used to specify the default treatment of an
|
||||
@ -218,7 +218,6 @@ ssize_t qemu_recv_full(int fd, void *buf, size_t count, int flags)
|
||||
QEMU_WARN_UNUSED_RESULT;
|
||||
|
||||
#ifndef _WIN32
|
||||
int qemu_eventfd(int pipefd[2]);
|
||||
int qemu_pipe(int pipefd[2]);
|
||||
#endif
|
||||
|
||||
|
@ -26,7 +26,7 @@
|
||||
#include "qemu-coroutine.h"
|
||||
#include "qemu-coroutine-int.h"
|
||||
#include "qemu-queue.h"
|
||||
#include "main-loop.h"
|
||||
#include "qemu-aio.h"
|
||||
#include "trace.h"
|
||||
|
||||
static QTAILQ_HEAD(, Coroutine) unlock_bh_queue =
|
||||
|
@ -28,7 +28,6 @@
|
||||
|
||||
#include <windows.h>
|
||||
#include <winsock2.h>
|
||||
#include "main-loop.h"
|
||||
|
||||
/* Workaround for older versions of MinGW. */
|
||||
#ifndef ECONNREFUSED
|
||||
|
@ -967,3 +967,21 @@ int socket_init(void)
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int default_monitor_get_fd(Monitor *mon, const char *name, Error **errp)
|
||||
{
|
||||
error_setg(errp, "only QEMU supports file descriptor passing");
|
||||
return -1;
|
||||
}
|
||||
QEMU_WEAK_ALIAS(monitor_get_fd, default_monitor_get_fd);
|
||||
|
||||
static int default_qemu_set_fd_handler2(int fd,
|
||||
IOCanReadHandler *fd_read_poll,
|
||||
IOHandler *fd_read,
|
||||
IOHandler *fd_write,
|
||||
void *opaque)
|
||||
|
||||
{
|
||||
abort();
|
||||
}
|
||||
QEMU_WEAK_ALIAS(qemu_set_fd_handler2, default_qemu_set_fd_handler2);
|
||||
|
@ -17,6 +17,9 @@
|
||||
#include <signal.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/time.h>
|
||||
#include "qemu-thread.h"
|
||||
|
||||
static void error_exit(int err, const char *msg)
|
||||
@ -115,6 +118,83 @@ void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
|
||||
error_exit(err, __func__);
|
||||
}
|
||||
|
||||
void qemu_sem_init(QemuSemaphore *sem, int init)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = sem_init(&sem->sem, 0, init);
|
||||
if (rc < 0) {
|
||||
error_exit(errno, __func__);
|
||||
}
|
||||
}
|
||||
|
||||
void qemu_sem_destroy(QemuSemaphore *sem)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = sem_destroy(&sem->sem);
|
||||
if (rc < 0) {
|
||||
error_exit(errno, __func__);
|
||||
}
|
||||
}
|
||||
|
||||
void qemu_sem_post(QemuSemaphore *sem)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = sem_post(&sem->sem);
|
||||
if (rc < 0) {
|
||||
error_exit(errno, __func__);
|
||||
}
|
||||
}
|
||||
|
||||
int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (ms <= 0) {
|
||||
/* This is cheaper than sem_timedwait. */
|
||||
do {
|
||||
rc = sem_trywait(&sem->sem);
|
||||
} while (rc == -1 && errno == EINTR);
|
||||
if (rc == -1 && errno == EAGAIN) {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
struct timeval tv;
|
||||
struct timespec ts;
|
||||
gettimeofday(&tv, NULL);
|
||||
ts.tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 1000000;
|
||||
ts.tv_sec = tv.tv_sec + ms / 1000;
|
||||
if (ts.tv_nsec >= 1000000000) {
|
||||
ts.tv_sec++;
|
||||
ts.tv_nsec -= 1000000000;
|
||||
}
|
||||
do {
|
||||
rc = sem_timedwait(&sem->sem, &ts);
|
||||
} while (rc == -1 && errno == EINTR);
|
||||
if (rc == -1 && errno == ETIMEDOUT) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (rc < 0) {
|
||||
error_exit(errno, __func__);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void qemu_sem_wait(QemuSemaphore *sem)
|
||||
{
|
||||
int rc;
|
||||
|
||||
do {
|
||||
rc = sem_wait(&sem->sem);
|
||||
} while (rc == -1 && errno == EINTR);
|
||||
if (rc < 0) {
|
||||
error_exit(errno, __func__);
|
||||
}
|
||||
}
|
||||
|
||||
void qemu_thread_create(QemuThread *thread,
|
||||
void *(*start_routine)(void*),
|
||||
void *arg, int mode)
|
||||
|
@ -1,6 +1,7 @@
|
||||
#ifndef __QEMU_THREAD_POSIX_H
|
||||
#define __QEMU_THREAD_POSIX_H 1
|
||||
#include "pthread.h"
|
||||
#include <semaphore.h>
|
||||
|
||||
struct QemuMutex {
|
||||
pthread_mutex_t lock;
|
||||
@ -10,6 +11,10 @@ struct QemuCond {
|
||||
pthread_cond_t cond;
|
||||
};
|
||||
|
||||
struct QemuSemaphore {
|
||||
sem_t sem;
|
||||
};
|
||||
|
||||
struct QemuThread {
|
||||
pthread_t thread;
|
||||
};
|
||||
|
@ -192,6 +192,41 @@ void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
|
||||
qemu_mutex_lock(mutex);
|
||||
}
|
||||
|
||||
void qemu_sem_init(QemuSemaphore *sem, int init)
|
||||
{
|
||||
/* Manual reset. */
|
||||
sem->sema = CreateSemaphore(NULL, init, LONG_MAX, NULL);
|
||||
}
|
||||
|
||||
void qemu_sem_destroy(QemuSemaphore *sem)
|
||||
{
|
||||
CloseHandle(sem->sema);
|
||||
}
|
||||
|
||||
void qemu_sem_post(QemuSemaphore *sem)
|
||||
{
|
||||
ReleaseSemaphore(sem->sema, 1, NULL);
|
||||
}
|
||||
|
||||
int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
|
||||
{
|
||||
int rc = WaitForSingleObject(sem->sema, ms);
|
||||
if (rc == WAIT_OBJECT_0) {
|
||||
return 0;
|
||||
}
|
||||
if (rc != WAIT_TIMEOUT) {
|
||||
error_exit(GetLastError(), __func__);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void qemu_sem_wait(QemuSemaphore *sem)
|
||||
{
|
||||
if (WaitForSingleObject(sem->sema, INFINITE) != WAIT_OBJECT_0) {
|
||||
error_exit(GetLastError(), __func__);
|
||||
}
|
||||
}
|
||||
|
||||
struct QemuThreadData {
|
||||
/* Passed to win32_start_routine. */
|
||||
void *(*start_routine)(void *);
|
||||
|
@ -13,6 +13,10 @@ struct QemuCond {
|
||||
HANDLE continue_event;
|
||||
};
|
||||
|
||||
struct QemuSemaphore {
|
||||
HANDLE sema;
|
||||
};
|
||||
|
||||
typedef struct QemuThreadData QemuThreadData;
|
||||
struct QemuThread {
|
||||
QemuThreadData *data;
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
typedef struct QemuMutex QemuMutex;
|
||||
typedef struct QemuCond QemuCond;
|
||||
typedef struct QemuSemaphore QemuSemaphore;
|
||||
typedef struct QemuThread QemuThread;
|
||||
|
||||
#ifdef _WIN32
|
||||
@ -38,6 +39,12 @@ void qemu_cond_signal(QemuCond *cond);
|
||||
void qemu_cond_broadcast(QemuCond *cond);
|
||||
void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex);
|
||||
|
||||
void qemu_sem_init(QemuSemaphore *sem, int init);
|
||||
void qemu_sem_post(QemuSemaphore *sem);
|
||||
void qemu_sem_wait(QemuSemaphore *sem);
|
||||
int qemu_sem_timedwait(QemuSemaphore *sem, int ms);
|
||||
void qemu_sem_destroy(QemuSemaphore *sem);
|
||||
|
||||
void qemu_thread_create(QemuThread *thread,
|
||||
void *(*start_routine)(void *),
|
||||
void *arg, int mode);
|
||||
|
12
qemu-timer.c
12
qemu-timer.c
@ -430,9 +430,11 @@ void qemu_unregister_clock_reset_notifier(QEMUClock *clock, Notifier *notifier)
|
||||
|
||||
void init_clocks(void)
|
||||
{
|
||||
rt_clock = qemu_new_clock(QEMU_CLOCK_REALTIME);
|
||||
vm_clock = qemu_new_clock(QEMU_CLOCK_VIRTUAL);
|
||||
host_clock = qemu_new_clock(QEMU_CLOCK_HOST);
|
||||
if (!rt_clock) {
|
||||
rt_clock = qemu_new_clock(QEMU_CLOCK_REALTIME);
|
||||
vm_clock = qemu_new_clock(QEMU_CLOCK_VIRTUAL);
|
||||
host_clock = qemu_new_clock(QEMU_CLOCK_HOST);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t qemu_timer_expire_time_ns(QEMUTimer *ts)
|
||||
@ -745,6 +747,10 @@ int init_timer_alarm(void)
|
||||
struct qemu_alarm_timer *t = NULL;
|
||||
int i, err = -1;
|
||||
|
||||
if (alarm_timer) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 0; alarm_timers[i].name; i++) {
|
||||
t = &alarm_timers[i];
|
||||
|
||||
|
35
qemu-tool.c
35
qemu-tool.c
@ -38,12 +38,6 @@ const char *qemu_get_vm_name(void)
|
||||
|
||||
Monitor *cur_mon;
|
||||
|
||||
int monitor_get_fd(Monitor *mon, const char *name, Error **errp)
|
||||
{
|
||||
error_setg(errp, "only QEMU supports file descriptor passing");
|
||||
return -1;
|
||||
}
|
||||
|
||||
void vm_stop(RunState state)
|
||||
{
|
||||
abort();
|
||||
@ -74,29 +68,9 @@ void monitor_protocol_event(MonitorEvent event, QObject *data)
|
||||
{
|
||||
}
|
||||
|
||||
int monitor_fdset_get_fd(int64_t fdset_id, int flags)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int monitor_fdset_dup_fd_remove(int dup_fd)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int monitor_fdset_dup_fd_find(int dup_fd)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int64_t cpu_get_clock(void)
|
||||
{
|
||||
return qemu_get_clock_ns(rt_clock);
|
||||
return get_clock_realtime();
|
||||
}
|
||||
|
||||
int64_t cpu_get_icount(void)
|
||||
@ -118,13 +92,6 @@ void qemu_clock_warp(QEMUClock *clock)
|
||||
{
|
||||
}
|
||||
|
||||
int qemu_init_main_loop(void)
|
||||
{
|
||||
init_clocks();
|
||||
init_timer_alarm();
|
||||
return main_loop_init();
|
||||
}
|
||||
|
||||
void slirp_update_timeout(uint32_t *timeout)
|
||||
{
|
||||
}
|
||||
|
20
qemu-user.c
20
qemu-user.c
@ -35,23 +35,3 @@ void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
|
||||
void monitor_set_error(Monitor *mon, QError *qerror)
|
||||
{
|
||||
}
|
||||
|
||||
int monitor_fdset_get_fd(int64_t fdset_id, int flags)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int monitor_fdset_dup_fd_remove(int dup_fd)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int monitor_fdset_dup_fd_find(int dup_fd)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
3
qmp.c
3
qmp.c
@ -471,11 +471,12 @@ DevicePropertyInfoList *qmp_device_list_properties(const char *typename,
|
||||
return prop_list;
|
||||
}
|
||||
|
||||
CpuDefinitionInfoList GCC_WEAK *arch_query_cpu_definitions(Error **errp)
|
||||
static CpuDefinitionInfoList *default_arch_query_cpu_definitions(Error **errp)
|
||||
{
|
||||
error_set(errp, QERR_NOT_SUPPORTED);
|
||||
return NULL;
|
||||
}
|
||||
QEMU_WEAK_ALIAS(arch_query_cpu_definitions, default_arch_query_cpu_definitions);
|
||||
|
||||
CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
|
||||
{
|
||||
|
@ -36,7 +36,7 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \
|
||||
tests/test-qmp-input-visitor.o tests/test-qmp-input-strict.o \
|
||||
tests/test-qmp-commands.o tests/test-visitor-serialization.o
|
||||
|
||||
test-qapi-obj-y = $(qobject-obj-y) $(qapi-obj-y) $(tools-obj-y)
|
||||
test-qapi-obj-y = $(qobject-obj-y) $(qapi-obj-y) qemu-tool.o
|
||||
test-qapi-obj-y += tests/test-qapi-visit.o tests/test-qapi-types.o
|
||||
test-qapi-obj-y += module.o
|
||||
|
||||
@ -47,8 +47,8 @@ tests/check-qstring$(EXESUF): tests/check-qstring.o qstring.o
|
||||
tests/check-qdict$(EXESUF): tests/check-qdict.o qdict.o qfloat.o qint.o qstring.o qbool.o qlist.o
|
||||
tests/check-qlist$(EXESUF): tests/check-qlist.o qlist.o qint.o
|
||||
tests/check-qfloat$(EXESUF): tests/check-qfloat.o qfloat.o
|
||||
tests/check-qjson$(EXESUF): tests/check-qjson.o $(qobject-obj-y) $(tools-obj-y)
|
||||
tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(coroutine-obj-y) $(tools-obj-y)
|
||||
tests/check-qjson$(EXESUF): tests/check-qjson.o $(qobject-obj-y) qemu-tool.o
|
||||
tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(coroutine-obj-y) $(tools-obj-y) $(block-obj-y) iov.o
|
||||
tests/test-iov$(EXESUF): tests/test-iov.o iov.o
|
||||
|
||||
tests/test-qapi-types.c tests/test-qapi-types.h :\
|
||||
@ -81,7 +81,7 @@ TARGETS=$(patsubst %-softmmu,%, $(filter %-softmmu,$(TARGET_DIRS)))
|
||||
QTEST_TARGETS=$(foreach TARGET,$(TARGETS), $(if $(check-qtest-$(TARGET)-y), $(TARGET),))
|
||||
check-qtest-$(CONFIG_POSIX)=$(foreach TARGET,$(TARGETS), $(check-qtest-$(TARGET)-y))
|
||||
|
||||
qtest-obj-y = tests/libqtest.o $(oslib-obj-y) $(tools-obj-y)
|
||||
qtest-obj-y = tests/libqtest.o $(oslib-obj-y)
|
||||
$(check-qtest-y): $(qtest-obj-y)
|
||||
|
||||
.PHONY: check-help
|
||||
|
289
thread-pool.c
Normal file
289
thread-pool.c
Normal file
@ -0,0 +1,289 @@
|
||||
/*
|
||||
* QEMU block layer thread pool
|
||||
*
|
||||
* Copyright IBM, Corp. 2008
|
||||
* Copyright Red Hat, Inc. 2012
|
||||
*
|
||||
* Authors:
|
||||
* Anthony Liguori <aliguori@us.ibm.com>
|
||||
* Paolo Bonzini <pbonzini@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
*
|
||||
* Contributions after 2012-01-13 are licensed under the terms of the
|
||||
* GNU GPL, version 2 or (at your option) any later version.
|
||||
*/
|
||||
#include "qemu-common.h"
|
||||
#include "qemu-queue.h"
|
||||
#include "qemu-thread.h"
|
||||
#include "osdep.h"
|
||||
#include "qemu-coroutine.h"
|
||||
#include "trace.h"
|
||||
#include "block_int.h"
|
||||
#include "event_notifier.h"
|
||||
#include "thread-pool.h"
|
||||
|
||||
static void do_spawn_thread(void);
|
||||
|
||||
typedef struct ThreadPoolElement ThreadPoolElement;
|
||||
|
||||
enum ThreadState {
|
||||
THREAD_QUEUED,
|
||||
THREAD_ACTIVE,
|
||||
THREAD_DONE,
|
||||
THREAD_CANCELED,
|
||||
};
|
||||
|
||||
struct ThreadPoolElement {
|
||||
BlockDriverAIOCB common;
|
||||
ThreadPoolFunc *func;
|
||||
void *arg;
|
||||
|
||||
/* Moving state out of THREAD_QUEUED is protected by lock. After
|
||||
* that, only the worker thread can write to it. Reads and writes
|
||||
* of state and ret are ordered with memory barriers.
|
||||
*/
|
||||
enum ThreadState state;
|
||||
int ret;
|
||||
|
||||
/* Access to this list is protected by lock. */
|
||||
QTAILQ_ENTRY(ThreadPoolElement) reqs;
|
||||
|
||||
/* Access to this list is protected by the global mutex. */
|
||||
QLIST_ENTRY(ThreadPoolElement) all;
|
||||
};
|
||||
|
||||
static EventNotifier notifier;
|
||||
static QemuMutex lock;
|
||||
static QemuCond check_cancel;
|
||||
static QemuSemaphore sem;
|
||||
static int max_threads = 64;
|
||||
static QEMUBH *new_thread_bh;
|
||||
|
||||
/* The following variables are protected by the global mutex. */
|
||||
static QLIST_HEAD(, ThreadPoolElement) head;
|
||||
|
||||
/* The following variables are protected by lock. */
|
||||
static QTAILQ_HEAD(, ThreadPoolElement) request_list;
|
||||
static int cur_threads;
|
||||
static int idle_threads;
|
||||
static int new_threads; /* backlog of threads we need to create */
|
||||
static int pending_threads; /* threads created but not running yet */
|
||||
static int pending_cancellations; /* whether we need a cond_broadcast */
|
||||
|
||||
static void *worker_thread(void *unused)
|
||||
{
|
||||
qemu_mutex_lock(&lock);
|
||||
pending_threads--;
|
||||
do_spawn_thread();
|
||||
|
||||
while (1) {
|
||||
ThreadPoolElement *req;
|
||||
int ret;
|
||||
|
||||
do {
|
||||
idle_threads++;
|
||||
qemu_mutex_unlock(&lock);
|
||||
ret = qemu_sem_timedwait(&sem, 10000);
|
||||
qemu_mutex_lock(&lock);
|
||||
idle_threads--;
|
||||
} while (ret == -1 && !QTAILQ_EMPTY(&request_list));
|
||||
if (ret == -1) {
|
||||
break;
|
||||
}
|
||||
|
||||
req = QTAILQ_FIRST(&request_list);
|
||||
QTAILQ_REMOVE(&request_list, req, reqs);
|
||||
req->state = THREAD_ACTIVE;
|
||||
qemu_mutex_unlock(&lock);
|
||||
|
||||
ret = req->func(req->arg);
|
||||
|
||||
req->ret = ret;
|
||||
/* Write ret before state. */
|
||||
smp_wmb();
|
||||
req->state = THREAD_DONE;
|
||||
|
||||
qemu_mutex_lock(&lock);
|
||||
if (pending_cancellations) {
|
||||
qemu_cond_broadcast(&check_cancel);
|
||||
}
|
||||
|
||||
event_notifier_set(¬ifier);
|
||||
}
|
||||
|
||||
cur_threads--;
|
||||
qemu_mutex_unlock(&lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void do_spawn_thread(void)
|
||||
{
|
||||
QemuThread t;
|
||||
|
||||
/* Runs with lock taken. */
|
||||
if (!new_threads) {
|
||||
return;
|
||||
}
|
||||
|
||||
new_threads--;
|
||||
pending_threads++;
|
||||
|
||||
qemu_thread_create(&t, worker_thread, NULL, QEMU_THREAD_DETACHED);
|
||||
}
|
||||
|
||||
static void spawn_thread_bh_fn(void *opaque)
|
||||
{
|
||||
qemu_mutex_lock(&lock);
|
||||
do_spawn_thread();
|
||||
qemu_mutex_unlock(&lock);
|
||||
}
|
||||
|
||||
static void spawn_thread(void)
|
||||
{
|
||||
cur_threads++;
|
||||
new_threads++;
|
||||
/* If there are threads being created, they will spawn new workers, so
|
||||
* we don't spend time creating many threads in a loop holding a mutex or
|
||||
* starving the current vcpu.
|
||||
*
|
||||
* If there are no idle threads, ask the main thread to create one, so we
|
||||
* inherit the correct affinity instead of the vcpu affinity.
|
||||
*/
|
||||
if (!pending_threads) {
|
||||
qemu_bh_schedule(new_thread_bh);
|
||||
}
|
||||
}
|
||||
|
||||
static void event_notifier_ready(EventNotifier *notifier)
|
||||
{
|
||||
ThreadPoolElement *elem, *next;
|
||||
|
||||
event_notifier_test_and_clear(notifier);
|
||||
restart:
|
||||
QLIST_FOREACH_SAFE(elem, &head, all, next) {
|
||||
if (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
|
||||
continue;
|
||||
}
|
||||
if (elem->state == THREAD_DONE) {
|
||||
trace_thread_pool_complete(elem, elem->common.opaque, elem->ret);
|
||||
}
|
||||
if (elem->state == THREAD_DONE && elem->common.cb) {
|
||||
QLIST_REMOVE(elem, all);
|
||||
/* Read state before ret. */
|
||||
smp_rmb();
|
||||
elem->common.cb(elem->common.opaque, elem->ret);
|
||||
qemu_aio_release(elem);
|
||||
goto restart;
|
||||
} else {
|
||||
/* remove the request */
|
||||
QLIST_REMOVE(elem, all);
|
||||
qemu_aio_release(elem);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int thread_pool_active(EventNotifier *notifier)
|
||||
{
|
||||
return !QLIST_EMPTY(&head);
|
||||
}
|
||||
|
||||
static void thread_pool_cancel(BlockDriverAIOCB *acb)
|
||||
{
|
||||
ThreadPoolElement *elem = (ThreadPoolElement *)acb;
|
||||
|
||||
trace_thread_pool_cancel(elem, elem->common.opaque);
|
||||
|
||||
qemu_mutex_lock(&lock);
|
||||
if (elem->state == THREAD_QUEUED &&
|
||||
/* No thread has yet started working on elem. we can try to "steal"
|
||||
* the item from the worker if we can get a signal from the
|
||||
* semaphore. Because this is non-blocking, we can do it with
|
||||
* the lock taken and ensure that elem will remain THREAD_QUEUED.
|
||||
*/
|
||||
qemu_sem_timedwait(&sem, 0) == 0) {
|
||||
QTAILQ_REMOVE(&request_list, elem, reqs);
|
||||
elem->state = THREAD_CANCELED;
|
||||
event_notifier_set(¬ifier);
|
||||
} else {
|
||||
pending_cancellations++;
|
||||
while (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
|
||||
qemu_cond_wait(&check_cancel, &lock);
|
||||
}
|
||||
pending_cancellations--;
|
||||
}
|
||||
qemu_mutex_unlock(&lock);
|
||||
}
|
||||
|
||||
static AIOPool thread_pool_cb_pool = {
|
||||
.aiocb_size = sizeof(ThreadPoolElement),
|
||||
.cancel = thread_pool_cancel,
|
||||
};
|
||||
|
||||
BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
|
||||
BlockDriverCompletionFunc *cb, void *opaque)
|
||||
{
|
||||
ThreadPoolElement *req;
|
||||
|
||||
req = qemu_aio_get(&thread_pool_cb_pool, NULL, cb, opaque);
|
||||
req->func = func;
|
||||
req->arg = arg;
|
||||
req->state = THREAD_QUEUED;
|
||||
|
||||
QLIST_INSERT_HEAD(&head, req, all);
|
||||
|
||||
trace_thread_pool_submit(req, arg);
|
||||
|
||||
qemu_mutex_lock(&lock);
|
||||
if (idle_threads == 0 && cur_threads < max_threads) {
|
||||
spawn_thread();
|
||||
}
|
||||
QTAILQ_INSERT_TAIL(&request_list, req, reqs);
|
||||
qemu_mutex_unlock(&lock);
|
||||
qemu_sem_post(&sem);
|
||||
return &req->common;
|
||||
}
|
||||
|
||||
typedef struct ThreadPoolCo {
|
||||
Coroutine *co;
|
||||
int ret;
|
||||
} ThreadPoolCo;
|
||||
|
||||
static void thread_pool_co_cb(void *opaque, int ret)
|
||||
{
|
||||
ThreadPoolCo *co = opaque;
|
||||
|
||||
co->ret = ret;
|
||||
qemu_coroutine_enter(co->co, NULL);
|
||||
}
|
||||
|
||||
int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg)
|
||||
{
|
||||
ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS };
|
||||
assert(qemu_in_coroutine());
|
||||
thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc);
|
||||
qemu_coroutine_yield();
|
||||
return tpc.ret;
|
||||
}
|
||||
|
||||
void thread_pool_submit(ThreadPoolFunc *func, void *arg)
|
||||
{
|
||||
thread_pool_submit_aio(func, arg, NULL, NULL);
|
||||
}
|
||||
|
||||
static void thread_pool_init(void)
|
||||
{
|
||||
QLIST_INIT(&head);
|
||||
event_notifier_init(¬ifier, false);
|
||||
qemu_mutex_init(&lock);
|
||||
qemu_cond_init(&check_cancel);
|
||||
qemu_sem_init(&sem, 0);
|
||||
qemu_aio_set_event_notifier(¬ifier, event_notifier_ready,
|
||||
thread_pool_active);
|
||||
|
||||
QTAILQ_INIT(&request_list);
|
||||
new_thread_bh = qemu_bh_new(spawn_thread_bh_fn, NULL);
|
||||
}
|
||||
|
||||
block_init(thread_pool_init)
|
34
thread-pool.h
Normal file
34
thread-pool.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* QEMU block layer thread pool
|
||||
*
|
||||
* Copyright IBM, Corp. 2008
|
||||
* Copyright Red Hat, Inc. 2012
|
||||
*
|
||||
* Authors:
|
||||
* Anthony Liguori <aliguori@us.ibm.com>
|
||||
* Paolo Bonzini <pbonzini@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2. See
|
||||
* the COPYING file in the top-level directory.
|
||||
*
|
||||
* Contributions after 2012-01-13 are licensed under the terms of the
|
||||
* GNU GPL, version 2 or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#ifndef QEMU_THREAD_POOL_H
|
||||
#define QEMU_THREAD_POOL_H 1
|
||||
|
||||
#include "qemu-common.h"
|
||||
#include "qemu-queue.h"
|
||||
#include "qemu-thread.h"
|
||||
#include "qemu-coroutine.h"
|
||||
#include "block_int.h"
|
||||
|
||||
typedef int ThreadPoolFunc(void *opaque);
|
||||
|
||||
BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
|
||||
BlockDriverCompletionFunc *cb, void *opaque);
|
||||
int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg);
|
||||
void thread_pool_submit(ThreadPoolFunc *func, void *arg);
|
||||
|
||||
#endif
|
@ -98,6 +98,11 @@ virtio_blk_rw_complete(void *req, int ret) "req %p ret %d"
|
||||
virtio_blk_handle_write(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
|
||||
virtio_blk_handle_read(void *req, uint64_t sector, size_t nsectors) "req %p sector %"PRIu64" nsectors %zu"
|
||||
|
||||
# thread-pool.c
|
||||
thread_pool_submit(void *req, void *opaque) "req %p opaque %p"
|
||||
thread_pool_complete(void *req, void *opaque, int ret) "req %p opaque %p ret %d"
|
||||
thread_pool_cancel(void *req, void *opaque) "req %p opaque %p"
|
||||
|
||||
# posix-aio-compat.c
|
||||
paio_submit(void *acb, void *opaque, int64_t sector_num, int nb_sectors, int type) "acb %p opaque %p sector_num %"PRId64" nb_sectors %d type %d"
|
||||
paio_complete(void *acb, void *opaque, int ret) "acb %p opaque %p ret %d"
|
||||
|
17
vl.c
17
vl.c
@ -2441,11 +2441,6 @@ static void free_and_trace(gpointer mem)
|
||||
free(mem);
|
||||
}
|
||||
|
||||
int qemu_init_main_loop(void)
|
||||
{
|
||||
return main_loop_init();
|
||||
}
|
||||
|
||||
int main(int argc, char **argv, char **envp)
|
||||
{
|
||||
int i;
|
||||
@ -3451,6 +3446,12 @@ int main(int argc, char **argv, char **envp)
|
||||
}
|
||||
loc_set_none();
|
||||
|
||||
qemu_init_cpu_loop();
|
||||
if (qemu_init_main_loop()) {
|
||||
fprintf(stderr, "qemu_init_main_loop failed\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (qemu_opts_foreach(qemu_find_opts("sandbox"), parse_sandbox, NULL, 0)) {
|
||||
exit(1);
|
||||
}
|
||||
@ -3618,12 +3619,6 @@ int main(int argc, char **argv, char **envp)
|
||||
|
||||
configure_accelerator();
|
||||
|
||||
qemu_init_cpu_loop();
|
||||
if (qemu_init_main_loop()) {
|
||||
fprintf(stderr, "qemu_init_main_loop failed\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
|
||||
if (machine_opts) {
|
||||
kernel_filename = qemu_opt_get(machine_opts, "kernel");
|
||||
|
Loading…
Reference in New Issue
Block a user