linux-aio: prevent submitting more than MAX_EVENTS

Invoking io_setup(MAX_EVENTS) we ask kernel to create ring buffer for us
with specified number of events.  But kernel ring buffer allocation logic
is a bit tricky (ring buffer is page size aligned + some percpu allocation
are required) so eventually more than requested events number is allocated.

From a userspace side we have to follow the convention and should not try
to io_submit() more or logic, which consumes completed events, should be
changed accordingly.  The pitfall is in the following sequence:

    MAX_EVENTS = 128
    io_setup(MAX_EVENTS)

    io_submit(MAX_EVENTS)
    io_submit(MAX_EVENTS)

    /* now 256 events are in-flight */

    io_getevents(MAX_EVENTS) = 128

    /* we can handle only 128 events at once, to be sure
     * that nothing is pended the io_getevents(MAX_EVENTS)
     * call must be invoked once more or hang will happen. */

To prevent the hang or reiteration of io_getevents() call this patch
restricts the number of in-flights, which is now limited to MAX_EVENTS.

Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 1468415004-31755-1-git-send-email-roman.penyaev@profitbricks.com
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: qemu-devel@nongnu.org
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Roman Pen 2016-07-13 15:03:24 +02:00 committed by Stefan Hajnoczi
parent 6977d901c4
commit 5e1b34a3fa

View File

@ -28,8 +28,6 @@
*/ */
#define MAX_EVENTS 128 #define MAX_EVENTS 128
#define MAX_QUEUED_IO 128
struct qemu_laiocb { struct qemu_laiocb {
BlockAIOCB common; BlockAIOCB common;
Coroutine *co; Coroutine *co;
@ -44,7 +42,8 @@ struct qemu_laiocb {
typedef struct { typedef struct {
int plugged; int plugged;
unsigned int n; unsigned int in_queue;
unsigned int in_flight;
bool blocked; bool blocked;
QSIMPLEQ_HEAD(, qemu_laiocb) pending; QSIMPLEQ_HEAD(, qemu_laiocb) pending;
} LaioQueue; } LaioQueue;
@ -131,6 +130,7 @@ static void qemu_laio_completion_bh(void *opaque)
s->event_max = 0; s->event_max = 0;
return; /* no more events */ return; /* no more events */
} }
s->io_q.in_flight -= s->event_max;
} }
/* Reschedule so nested event loops see currently pending completions */ /* Reschedule so nested event loops see currently pending completions */
@ -192,7 +192,8 @@ static void ioq_init(LaioQueue *io_q)
{ {
QSIMPLEQ_INIT(&io_q->pending); QSIMPLEQ_INIT(&io_q->pending);
io_q->plugged = 0; io_q->plugged = 0;
io_q->n = 0; io_q->in_queue = 0;
io_q->in_flight = 0;
io_q->blocked = false; io_q->blocked = false;
} }
@ -200,14 +201,17 @@ static void ioq_submit(LinuxAioState *s)
{ {
int ret, len; int ret, len;
struct qemu_laiocb *aiocb; struct qemu_laiocb *aiocb;
struct iocb *iocbs[MAX_QUEUED_IO]; struct iocb *iocbs[MAX_EVENTS];
QSIMPLEQ_HEAD(, qemu_laiocb) completed; QSIMPLEQ_HEAD(, qemu_laiocb) completed;
do { do {
if (s->io_q.in_flight >= MAX_EVENTS) {
break;
}
len = 0; len = 0;
QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) { QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) {
iocbs[len++] = &aiocb->iocb; iocbs[len++] = &aiocb->iocb;
if (len == MAX_QUEUED_IO) { if (s->io_q.in_flight + len >= MAX_EVENTS) {
break; break;
} }
} }
@ -220,11 +224,12 @@ static void ioq_submit(LinuxAioState *s)
abort(); abort();
} }
s->io_q.n -= ret; s->io_q.in_flight += ret;
s->io_q.in_queue -= ret;
aiocb = container_of(iocbs[ret - 1], struct qemu_laiocb, iocb); aiocb = container_of(iocbs[ret - 1], struct qemu_laiocb, iocb);
QSIMPLEQ_SPLIT_AFTER(&s->io_q.pending, aiocb, next, &completed); QSIMPLEQ_SPLIT_AFTER(&s->io_q.pending, aiocb, next, &completed);
} while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending)); } while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending));
s->io_q.blocked = (s->io_q.n > 0); s->io_q.blocked = (s->io_q.in_queue > 0);
} }
void laio_io_plug(BlockDriverState *bs, LinuxAioState *s) void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
@ -264,9 +269,10 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e)); io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next); QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
s->io_q.n++; s->io_q.in_queue++;
if (!s->io_q.blocked && if (!s->io_q.blocked &&
(!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) { (!s->io_q.plugged ||
s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) {
ioq_submit(s); ioq_submit(s);
} }