raw-posix: refactor AIO support

Currently the raw-posix.c code contains a lot of knowledge about the
asynchronous I/O scheme that is mostly implemented in posix-aio-compat.c.
All this code does not really belong here and is getting a bit in the
way of implementing native AIO on Linux.

So instead move all the guts of the AIO implementation into
posix-aio-compat.c (which might need a better name, btw).

There's now a very small interface between the AIO providers and raw-posix.c:

 - an init routine is called from raw_open_common to return an AIO context
   for this drive.  An AIO implementation may either re-use one context
   for all drives, or use a different one for each as the Linux native
   AIO support will do.
 - an submit routine is called from the aio_reav/writev methods to submit
   an AIO request

There are no indirect calls involved in this interface as we need to
decide which one to call manually.  We will only call the Linux AIO native
init function if we were requested to by vl.c, and we will only call
the native submit function if we are asked to and the request is properly
aligned.  That's also the reason why the alignment check actually does
the inverse move and now goes into raw-posix.c.

The old posix-aio-compat.h headers is removed now that most of it's
content is private to posix-aio-compat.c, and instead we add a new
block/raw-posix-aio.h headers is created containing only the tiny interface
between raw-posix.c and the AIO implementation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
Christoph Hellwig 2009-08-20 16:58:19 +02:00 committed by Anthony Liguori
parent bf0cb498c5
commit 9ef91a6771
4 changed files with 328 additions and 378 deletions

36
block/raw-posix-aio.h Normal file
View File

@ -0,0 +1,36 @@
/*
* QEMU Posix block I/O backend AIO support
*
* Copyright IBM, Corp. 2008
*
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
*/
#ifndef QEMU_RAW_POSIX_AIO_H
#define QEMU_RAW_POSIX_AIO_H
/* AIO request types */
#define QEMU_AIO_READ 0x0001
#define QEMU_AIO_WRITE 0x0002
#define QEMU_AIO_IOCTL 0x0004
#define QEMU_AIO_TYPE_MASK \
(QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL)
/* AIO flags */
#define QEMU_AIO_MISALIGNED 0x1000
/* posix-aio-compat.c - thread pool based implementation */
void *paio_init(void);
BlockDriverAIOCB *paio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque, int type);
BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
unsigned long int req, void *buf,
BlockDriverCompletionFunc *cb, void *opaque);
#endif /* QEMU_RAW_POSIX_AIO_H */

View File

@ -27,7 +27,7 @@
#include "qemu-log.h" #include "qemu-log.h"
#include "block_int.h" #include "block_int.h"
#include "module.h" #include "module.h"
#include "posix-aio-compat.h" #include "block/raw-posix-aio.h"
#ifdef CONFIG_COCOA #ifdef CONFIG_COCOA
#include <paths.h> #include <paths.h>
@ -107,6 +107,7 @@ typedef struct BDRVRawState {
int type; int type;
unsigned int lseek_err_cnt; unsigned int lseek_err_cnt;
int open_flags; int open_flags;
void *aio_ctx;
#if defined(__linux__) #if defined(__linux__)
/* linux floppy specific */ /* linux floppy specific */
int64_t fd_open_time; int64_t fd_open_time;
@ -117,8 +118,6 @@ typedef struct BDRVRawState {
uint8_t* aligned_buf; uint8_t* aligned_buf;
} BDRVRawState; } BDRVRawState;
static int posix_aio_init(void);
static int fd_open(BlockDriverState *bs); static int fd_open(BlockDriverState *bs);
static int64_t raw_getlength(BlockDriverState *bs); static int64_t raw_getlength(BlockDriverState *bs);
@ -132,8 +131,6 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
BDRVRawState *s = bs->opaque; BDRVRawState *s = bs->opaque;
int fd, ret; int fd, ret;
posix_aio_init();
s->lseek_err_cnt = 0; s->lseek_err_cnt = 0;
s->open_flags = open_flags | O_BINARY; s->open_flags = open_flags | O_BINARY;
@ -165,12 +162,22 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
if ((bdrv_flags & BDRV_O_NOCACHE)) { if ((bdrv_flags & BDRV_O_NOCACHE)) {
s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE); s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE);
if (s->aligned_buf == NULL) { if (s->aligned_buf == NULL) {
ret = -errno; goto out_close;
close(fd);
return ret;
} }
} }
s->aio_ctx = paio_init();
if (!s->aio_ctx) {
goto out_free_buf;
}
return 0; return 0;
out_free_buf:
qemu_vfree(s->aligned_buf);
out_close:
close(fd);
return -errno;
} }
static int raw_open(BlockDriverState *bs, const char *filename, int flags) static int raw_open(BlockDriverState *bs, const char *filename, int flags)
@ -487,240 +494,58 @@ static int raw_write(BlockDriverState *bs, int64_t sector_num,
return ret; return ret;
} }
/***********************************************************/ /*
/* Unix AIO using POSIX AIO */ * Check if all memory in this vector is sector aligned.
*/
typedef struct RawAIOCB { static int qiov_is_aligned(QEMUIOVector *qiov)
BlockDriverAIOCB common;
struct qemu_paiocb aiocb;
struct RawAIOCB *next;
int ret;
} RawAIOCB;
typedef struct PosixAioState
{ {
int rfd, wfd; int i;
RawAIOCB *first_aio;
} PosixAioState;
static void posix_aio_read(void *opaque)
{
PosixAioState *s = opaque;
RawAIOCB *acb, **pacb;
int ret;
ssize_t len;
/* read all bytes from signal pipe */
for (;;) {
char bytes[16];
len = read(s->rfd, bytes, sizeof(bytes));
if (len == -1 && errno == EINTR)
continue; /* try again */
if (len == sizeof(bytes))
continue; /* more to read */
break;
}
for(;;) {
pacb = &s->first_aio;
for(;;) {
acb = *pacb;
if (!acb)
goto the_end;
ret = qemu_paio_error(&acb->aiocb);
if (ret == ECANCELED) {
/* remove the request */
*pacb = acb->next;
qemu_aio_release(acb);
} else if (ret != EINPROGRESS) {
/* end of aio */
if (ret == 0) {
ret = qemu_paio_return(&acb->aiocb);
if (ret == acb->aiocb.aio_nbytes)
ret = 0;
else
ret = -EINVAL;
} else {
ret = -ret;
}
/* remove the request */
*pacb = acb->next;
/* call the callback */
acb->common.cb(acb->common.opaque, ret);
qemu_aio_release(acb);
break;
} else {
pacb = &acb->next;
}
}
}
the_end: ;
}
static int posix_aio_flush(void *opaque)
{
PosixAioState *s = opaque;
return !!s->first_aio;
}
static PosixAioState *posix_aio_state;
static void aio_signal_handler(int signum)
{
if (posix_aio_state) {
char byte = 0;
write(posix_aio_state->wfd, &byte, sizeof(byte));
}
qemu_service_io();
}
static int posix_aio_init(void)
{
struct sigaction act;
PosixAioState *s;
int fds[2];
struct qemu_paioinit ai;
if (posix_aio_state)
return 0;
s = qemu_malloc(sizeof(PosixAioState));
sigfillset(&act.sa_mask);
act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
act.sa_handler = aio_signal_handler;
sigaction(SIGUSR2, &act, NULL);
s->first_aio = NULL;
if (pipe(fds) == -1) {
fprintf(stderr, "failed to create pipe\n");
return -errno;
}
s->rfd = fds[0];
s->wfd = fds[1];
fcntl(s->rfd, F_SETFL, O_NONBLOCK);
fcntl(s->wfd, F_SETFL, O_NONBLOCK);
qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s);
memset(&ai, 0, sizeof(ai));
ai.aio_threads = 64;
ai.aio_num = 64;
qemu_paio_init(&ai);
posix_aio_state = s;
for (i = 0; i < qiov->niov; i++) {
if ((uintptr_t) qiov->iov[i].iov_base % 512) {
return 0; return 0;
} }
static void raw_aio_remove(RawAIOCB *acb)
{
RawAIOCB **pacb;
/* remove the callback from the queue */
pacb = &posix_aio_state->first_aio;
for(;;) {
if (*pacb == NULL) {
fprintf(stderr, "raw_aio_remove: aio request not found!\n");
break;
} else if (*pacb == acb) {
*pacb = acb->next;
qemu_aio_release(acb);
break;
}
pacb = &(*pacb)->next;
}
} }
static void raw_aio_cancel(BlockDriverAIOCB *blockacb) return 1;
{
int ret;
RawAIOCB *acb = (RawAIOCB *)blockacb;
ret = qemu_paio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
if (ret == QEMU_PAIO_NOTCANCELED) {
/* fail safe: if the aio could not be canceled, we wait for
it */
while (qemu_paio_error(&acb->aiocb) == EINPROGRESS);
} }
raw_aio_remove(acb); static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
} int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque, int type)
static AIOPool raw_aio_pool = {
.aiocb_size = sizeof(RawAIOCB),
.cancel = raw_aio_cancel,
};
static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{ {
BDRVRawState *s = bs->opaque; BDRVRawState *s = bs->opaque;
RawAIOCB *acb;
if (fd_open(bs) < 0) if (fd_open(bs) < 0)
return NULL; return NULL;
acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
if (!acb)
return NULL;
acb->aiocb.aio_fildes = s->fd;
acb->aiocb.ev_signo = SIGUSR2;
acb->aiocb.aio_iov = qiov->iov;
acb->aiocb.aio_niov = qiov->niov;
acb->aiocb.aio_nbytes = nb_sectors * 512;
acb->aiocb.aio_offset = sector_num * 512;
acb->aiocb.aio_flags = 0;
/* /*
* If O_DIRECT is used the buffer needs to be aligned on a sector * If O_DIRECT is used the buffer needs to be aligned on a sector
* boundary. Tell the low level code to ensure that in case it's * boundary. Check if this is the case or telll the low-level
* not done yet. * driver that it needs to copy the buffer.
*/ */
if (s->aligned_buf) if (s->aligned_buf && !qiov_is_aligned(qiov)) {
acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED; type |= QEMU_AIO_MISALIGNED;
}
acb->next = posix_aio_state->first_aio; return paio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov, nb_sectors,
posix_aio_state->first_aio = acb; cb, opaque, type);
return acb;
} }
static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs, static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque) BlockDriverCompletionFunc *cb, void *opaque)
{ {
RawAIOCB *acb; return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
cb, opaque, QEMU_AIO_READ);
acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
if (!acb)
return NULL;
if (qemu_paio_read(&acb->aiocb) < 0) {
raw_aio_remove(acb);
return NULL;
}
return &acb->common;
} }
static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs, static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque) BlockDriverCompletionFunc *cb, void *opaque)
{ {
RawAIOCB *acb; return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
cb, opaque, QEMU_AIO_WRITE);
acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
if (!acb)
return NULL;
if (qemu_paio_write(&acb->aiocb) < 0) {
raw_aio_remove(acb);
return NULL;
}
return &acb->common;
} }
static void raw_close(BlockDriverState *bs) static void raw_close(BlockDriverState *bs)
@ -1085,30 +910,10 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
BlockDriverCompletionFunc *cb, void *opaque) BlockDriverCompletionFunc *cb, void *opaque)
{ {
BDRVRawState *s = bs->opaque; BDRVRawState *s = bs->opaque;
RawAIOCB *acb;
if (fd_open(bs) < 0) if (fd_open(bs) < 0)
return NULL; return NULL;
return paio_ioctl(bs, s->fd, req, buf, cb, opaque);
acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
if (!acb)
return NULL;
acb->aiocb.aio_fildes = s->fd;
acb->aiocb.ev_signo = SIGUSR2;
acb->aiocb.aio_offset = 0;
acb->aiocb.aio_flags = 0;
acb->next = posix_aio_state->first_aio;
posix_aio_state->first_aio = acb;
acb->aiocb.aio_ioctl_buf = buf;
acb->aiocb.aio_ioctl_cmd = req;
if (qemu_paio_ioctl(&acb->aiocb) < 0) {
raw_aio_remove(acb);
return NULL;
}
return &acb->common;
} }
#elif defined(__FreeBSD__) #elif defined(__FreeBSD__)
@ -1189,8 +994,6 @@ static int floppy_open(BlockDriverState *bs, const char *filename, int flags)
BDRVRawState *s = bs->opaque; BDRVRawState *s = bs->opaque;
int ret; int ret;
posix_aio_init();
s->type = FTYPE_FD; s->type = FTYPE_FD;
/* open will not fail even if no floppy is inserted, so add O_NONBLOCK */ /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */

View File

@ -12,17 +12,49 @@
*/ */
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include <sys/types.h>
#include <pthread.h> #include <pthread.h>
#include <unistd.h> #include <unistd.h>
#include <errno.h> #include <errno.h>
#include <time.h> #include <time.h>
#include <signal.h>
#include <string.h> #include <string.h>
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include "sys-queue.h"
#include "osdep.h" #include "osdep.h"
#include "qemu-common.h" #include "qemu-common.h"
#include "block_int.h"
#include "block/raw-posix-aio.h"
struct qemu_paiocb {
BlockDriverAIOCB common;
int aio_fildes;
union {
struct iovec *aio_iov;
void *aio_ioctl_buf;
};
int aio_niov;
size_t aio_nbytes;
#define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */
int ev_signo;
off_t aio_offset;
TAILQ_ENTRY(qemu_paiocb) node;
int aio_type;
ssize_t ret;
int active;
struct qemu_paiocb *next;
};
typedef struct PosixAioState {
int rfd, wfd;
struct qemu_paiocb *first_aio;
} PosixAioState;
#include "posix-aio-compat.h"
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
@ -132,30 +164,13 @@ qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
#endif #endif
/*
* Check if we need to copy the data in the aiocb into a new
* properly aligned buffer.
*/
static int aiocb_needs_copy(struct qemu_paiocb *aiocb)
{
if (aiocb->aio_flags & QEMU_AIO_SECTOR_ALIGNED) {
int i;
for (i = 0; i < aiocb->aio_niov; i++)
if ((uintptr_t) aiocb->aio_iov[i].iov_base % 512)
return 1;
}
return 0;
}
static size_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb) static size_t handle_aiocb_rw_vector(struct qemu_paiocb *aiocb)
{ {
size_t offset = 0; size_t offset = 0;
ssize_t len; ssize_t len;
do { do {
if (aiocb->aio_type == QEMU_PAIO_WRITE) if (aiocb->aio_type & QEMU_AIO_WRITE)
len = qemu_pwritev(aiocb->aio_fildes, len = qemu_pwritev(aiocb->aio_fildes,
aiocb->aio_iov, aiocb->aio_iov,
aiocb->aio_niov, aiocb->aio_niov,
@ -178,7 +193,7 @@ static size_t handle_aiocb_rw_linear(struct qemu_paiocb *aiocb, char *buf)
size_t len; size_t len;
while (offset < aiocb->aio_nbytes) { while (offset < aiocb->aio_nbytes) {
if (aiocb->aio_type == QEMU_PAIO_WRITE) if (aiocb->aio_type & QEMU_AIO_WRITE)
len = pwrite(aiocb->aio_fildes, len = pwrite(aiocb->aio_fildes,
(const char *)buf + offset, (const char *)buf + offset,
aiocb->aio_nbytes - offset, aiocb->aio_nbytes - offset,
@ -208,7 +223,7 @@ static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
size_t nbytes; size_t nbytes;
char *buf; char *buf;
if (!aiocb_needs_copy(aiocb)) { if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
/* /*
* If there is just a single buffer, and it is properly aligned * If there is just a single buffer, and it is properly aligned
* we can just use plain pread/pwrite without any problems. * we can just use plain pread/pwrite without any problems.
@ -243,7 +258,7 @@ static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
* a single aligned buffer. * a single aligned buffer.
*/ */
buf = qemu_memalign(512, aiocb->aio_nbytes); buf = qemu_memalign(512, aiocb->aio_nbytes);
if (aiocb->aio_type == QEMU_PAIO_WRITE) { if (aiocb->aio_type & QEMU_AIO_WRITE) {
char *p = buf; char *p = buf;
int i; int i;
@ -254,7 +269,7 @@ static size_t handle_aiocb_rw(struct qemu_paiocb *aiocb)
} }
nbytes = handle_aiocb_rw_linear(aiocb, buf); nbytes = handle_aiocb_rw_linear(aiocb, buf);
if (aiocb->aio_type != QEMU_PAIO_WRITE) { if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
char *p = buf; char *p = buf;
size_t count = aiocb->aio_nbytes, copy; size_t count = aiocb->aio_nbytes, copy;
int i; int i;
@ -310,12 +325,12 @@ static void *aio_thread(void *unused)
idle_threads--; idle_threads--;
mutex_unlock(&lock); mutex_unlock(&lock);
switch (aiocb->aio_type) { switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
case QEMU_PAIO_READ: case QEMU_AIO_READ:
case QEMU_PAIO_WRITE: case QEMU_AIO_WRITE:
ret = handle_aiocb_rw(aiocb); ret = handle_aiocb_rw(aiocb);
break; break;
case QEMU_PAIO_IOCTL: case QEMU_AIO_IOCTL:
ret = handle_aiocb_ioctl(aiocb); ret = handle_aiocb_ioctl(aiocb);
break; break;
default: default:
@ -346,24 +361,8 @@ static void spawn_thread(void)
thread_create(&thread_id, &attr, aio_thread, NULL); thread_create(&thread_id, &attr, aio_thread, NULL);
} }
int qemu_paio_init(struct qemu_paioinit *aioinit) static void qemu_paio_submit(struct qemu_paiocb *aiocb)
{ {
int ret;
ret = pthread_attr_init(&attr);
if (ret) die2(ret, "pthread_attr_init");
ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
if (ret) die2(ret, "pthread_attr_setdetachstate");
TAILQ_INIT(&request_list);
return 0;
}
static int qemu_paio_submit(struct qemu_paiocb *aiocb, int type)
{
aiocb->aio_type = type;
aiocb->ret = -EINPROGRESS; aiocb->ret = -EINPROGRESS;
aiocb->active = 0; aiocb->active = 0;
mutex_lock(&lock); mutex_lock(&lock);
@ -372,26 +371,9 @@ static int qemu_paio_submit(struct qemu_paiocb *aiocb, int type)
TAILQ_INSERT_TAIL(&request_list, aiocb, node); TAILQ_INSERT_TAIL(&request_list, aiocb, node);
mutex_unlock(&lock); mutex_unlock(&lock);
cond_signal(&cond); cond_signal(&cond);
return 0;
} }
int qemu_paio_read(struct qemu_paiocb *aiocb) static ssize_t qemu_paio_return(struct qemu_paiocb *aiocb)
{
return qemu_paio_submit(aiocb, QEMU_PAIO_READ);
}
int qemu_paio_write(struct qemu_paiocb *aiocb)
{
return qemu_paio_submit(aiocb, QEMU_PAIO_WRITE);
}
int qemu_paio_ioctl(struct qemu_paiocb *aiocb)
{
return qemu_paio_submit(aiocb, QEMU_PAIO_IOCTL);
}
ssize_t qemu_paio_return(struct qemu_paiocb *aiocb)
{ {
ssize_t ret; ssize_t ret;
@ -402,7 +384,7 @@ ssize_t qemu_paio_return(struct qemu_paiocb *aiocb)
return ret; return ret;
} }
int qemu_paio_error(struct qemu_paiocb *aiocb) static int qemu_paio_error(struct qemu_paiocb *aiocb)
{ {
ssize_t ret = qemu_paio_return(aiocb); ssize_t ret = qemu_paio_return(aiocb);
@ -414,20 +396,217 @@ int qemu_paio_error(struct qemu_paiocb *aiocb)
return ret; return ret;
} }
int qemu_paio_cancel(int fd, struct qemu_paiocb *aiocb) static void posix_aio_read(void *opaque)
{ {
PosixAioState *s = opaque;
struct qemu_paiocb *acb, **pacb;
int ret; int ret;
ssize_t len;
/* read all bytes from signal pipe */
for (;;) {
char bytes[16];
len = read(s->rfd, bytes, sizeof(bytes));
if (len == -1 && errno == EINTR)
continue; /* try again */
if (len == sizeof(bytes))
continue; /* more to read */
break;
}
for(;;) {
pacb = &s->first_aio;
for(;;) {
acb = *pacb;
if (!acb)
goto the_end;
ret = qemu_paio_error(acb);
if (ret == ECANCELED) {
/* remove the request */
*pacb = acb->next;
qemu_aio_release(acb);
} else if (ret != EINPROGRESS) {
/* end of aio */
if (ret == 0) {
ret = qemu_paio_return(acb);
if (ret == acb->aio_nbytes)
ret = 0;
else
ret = -EINVAL;
} else {
ret = -ret;
}
/* remove the request */
*pacb = acb->next;
/* call the callback */
acb->common.cb(acb->common.opaque, ret);
qemu_aio_release(acb);
break;
} else {
pacb = &acb->next;
}
}
}
the_end: ;
}
static int posix_aio_flush(void *opaque)
{
PosixAioState *s = opaque;
return !!s->first_aio;
}
static PosixAioState *posix_aio_state;
static void aio_signal_handler(int signum)
{
if (posix_aio_state) {
char byte = 0;
write(posix_aio_state->wfd, &byte, sizeof(byte));
}
qemu_service_io();
}
static void paio_remove(struct qemu_paiocb *acb)
{
struct qemu_paiocb **pacb;
/* remove the callback from the queue */
pacb = &posix_aio_state->first_aio;
for(;;) {
if (*pacb == NULL) {
fprintf(stderr, "paio_remove: aio request not found!\n");
break;
} else if (*pacb == acb) {
*pacb = acb->next;
qemu_aio_release(acb);
break;
}
pacb = &(*pacb)->next;
}
}
static void paio_cancel(BlockDriverAIOCB *blockacb)
{
struct qemu_paiocb *acb = (struct qemu_paiocb *)blockacb;
int active = 0;
mutex_lock(&lock); mutex_lock(&lock);
if (!aiocb->active) { if (!acb->active) {
TAILQ_REMOVE(&request_list, aiocb, node); TAILQ_REMOVE(&request_list, acb, node);
aiocb->ret = -ECANCELED; acb->ret = -ECANCELED;
ret = QEMU_PAIO_CANCELED; } else if (acb->ret == -EINPROGRESS) {
} else if (aiocb->ret == -EINPROGRESS) active = 1;
ret = QEMU_PAIO_NOTCANCELED; }
else
ret = QEMU_PAIO_ALLDONE;
mutex_unlock(&lock); mutex_unlock(&lock);
return ret; if (active) {
/* fail safe: if the aio could not be canceled, we wait for
it */
while (qemu_paio_error(acb) == EINPROGRESS)
;
}
paio_remove(acb);
}
static AIOPool raw_aio_pool = {
.aiocb_size = sizeof(struct qemu_paiocb),
.cancel = paio_cancel,
};
BlockDriverAIOCB *paio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque, int type)
{
struct qemu_paiocb *acb;
acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
if (!acb)
return NULL;
acb->aio_type = type;
acb->aio_fildes = fd;
acb->ev_signo = SIGUSR2;
acb->aio_iov = qiov->iov;
acb->aio_niov = qiov->niov;
acb->aio_nbytes = nb_sectors * 512;
acb->aio_offset = sector_num * 512;
acb->next = posix_aio_state->first_aio;
posix_aio_state->first_aio = acb;
qemu_paio_submit(acb);
return &acb->common;
}
BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
unsigned long int req, void *buf,
BlockDriverCompletionFunc *cb, void *opaque)
{
struct qemu_paiocb *acb;
acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
if (!acb)
return NULL;
acb->aio_type = QEMU_AIO_IOCTL;
acb->aio_fildes = fd;
acb->ev_signo = SIGUSR2;
acb->aio_offset = 0;
acb->aio_ioctl_buf = buf;
acb->aio_ioctl_cmd = req;
acb->next = posix_aio_state->first_aio;
posix_aio_state->first_aio = acb;
qemu_paio_submit(acb);
return &acb->common;
}
void *paio_init(void)
{
struct sigaction act;
PosixAioState *s;
int fds[2];
int ret;
if (posix_aio_state)
return posix_aio_state;
s = qemu_malloc(sizeof(PosixAioState));
sigfillset(&act.sa_mask);
act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
act.sa_handler = aio_signal_handler;
sigaction(SIGUSR2, &act, NULL);
s->first_aio = NULL;
if (pipe(fds) == -1) {
fprintf(stderr, "failed to create pipe\n");
return NULL;
}
s->rfd = fds[0];
s->wfd = fds[1];
fcntl(s->rfd, F_SETFL, O_NONBLOCK);
fcntl(s->wfd, F_SETFL, O_NONBLOCK);
qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s);
ret = pthread_attr_init(&attr);
if (ret)
die2(ret, "pthread_attr_init");
ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
if (ret)
die2(ret, "pthread_attr_setdetachstate");
TAILQ_INIT(&request_list);
posix_aio_state = s;
return posix_aio_state;
} }

View File

@ -1,68 +0,0 @@
/*
* QEMU posix-aio emulation
*
* Copyright IBM, Corp. 2008
*
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
*/
#ifndef QEMU_POSIX_AIO_COMPAT_H
#define QEMU_POSIX_AIO_COMPAT_H
#include <sys/types.h>
#include <unistd.h>
#include <signal.h>
#include "sys-queue.h"
#define QEMU_PAIO_CANCELED 0x01
#define QEMU_PAIO_NOTCANCELED 0x02
#define QEMU_PAIO_ALLDONE 0x03
struct qemu_paiocb
{
int aio_fildes;
union {
struct iovec *aio_iov;
void *aio_ioctl_buf;
};
int aio_niov;
size_t aio_nbytes;
#define aio_ioctl_cmd aio_nbytes /* for QEMU_PAIO_IOCTL */
int ev_signo;
off_t aio_offset;
unsigned aio_flags;
/* 512 byte alignment required for buffer, offset and length */
#define QEMU_AIO_SECTOR_ALIGNED 0x01
/* private */
TAILQ_ENTRY(qemu_paiocb) node;
int aio_type;
#define QEMU_PAIO_READ 0x01
#define QEMU_PAIO_WRITE 0x02
#define QEMU_PAIO_IOCTL 0x03
ssize_t ret;
int active;
};
struct qemu_paioinit
{
unsigned int aio_threads;
unsigned int aio_num;
unsigned int aio_idle_time;
};
int qemu_paio_init(struct qemu_paioinit *aioinit);
int qemu_paio_read(struct qemu_paiocb *aiocb);
int qemu_paio_write(struct qemu_paiocb *aiocb);
int qemu_paio_ioctl(struct qemu_paiocb *aiocb);
int qemu_paio_error(struct qemu_paiocb *aiocb);
ssize_t qemu_paio_return(struct qemu_paiocb *aiocb);
int qemu_paio_cancel(int fd, struct qemu_paiocb *aiocb);
#endif