2013-12-01 22:23:41 +01:00
|
|
|
/*
|
|
|
|
* QEMU Block driver for NBD
|
|
|
|
*
|
2016-10-14 20:33:04 +02:00
|
|
|
* Copyright (C) 2016 Red Hat, Inc.
|
2013-12-01 22:23:41 +01:00
|
|
|
* Copyright (C) 2008 Bull S.A.S.
|
|
|
|
* Author: Laurent Vivier <Laurent.Vivier@bull.net>
|
|
|
|
*
|
|
|
|
* Some parts:
|
|
|
|
* Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
|
|
* in the Software without restriction, including without limitation the rights
|
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
* THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
2016-01-18 19:01:42 +01:00
|
|
|
#include "qemu/osdep.h"
|
2017-05-26 13:09:13 +02:00
|
|
|
#include "qapi/error.h"
|
2013-12-01 22:23:41 +01:00
|
|
|
#include "nbd-client.h"
|
|
|
|
|
|
|
|
#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
|
|
|
|
#define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs))
|
|
|
|
|
2017-03-14 12:11:56 +01:00
|
|
|
static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
|
2013-12-01 22:23:45 +01:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < MAX_NBD_REQUESTS; i++) {
|
|
|
|
if (s->recv_coroutine[i]) {
|
2017-03-14 12:11:56 +01:00
|
|
|
aio_co_wake(s->recv_coroutine[i]);
|
2013-12-01 22:23:45 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-06 22:06:16 +01:00
|
|
|
static void nbd_teardown_connection(BlockDriverState *bs)
|
2014-02-26 15:30:18 +01:00
|
|
|
{
|
2016-10-14 20:33:06 +02:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2015-02-06 22:06:16 +01:00
|
|
|
|
2016-02-10 19:41:01 +01:00
|
|
|
if (!client->ioc) { /* Already closed */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-02-26 15:30:18 +01:00
|
|
|
/* finish any pending coroutines */
|
2016-02-10 19:41:01 +01:00
|
|
|
qio_channel_shutdown(client->ioc,
|
|
|
|
QIO_CHANNEL_SHUTDOWN_BOTH,
|
|
|
|
NULL);
|
2017-03-14 12:11:56 +01:00
|
|
|
BDRV_POLL_WHILE(bs, client->read_reply_co);
|
2014-02-26 15:30:18 +01:00
|
|
|
|
2015-02-06 22:06:16 +01:00
|
|
|
nbd_client_detach_aio_context(bs);
|
2016-02-10 19:41:01 +01:00
|
|
|
object_unref(OBJECT(client->sioc));
|
|
|
|
client->sioc = NULL;
|
|
|
|
object_unref(OBJECT(client->ioc));
|
|
|
|
client->ioc = NULL;
|
2014-02-26 15:30:18 +01:00
|
|
|
}
|
|
|
|
|
2017-02-13 14:52:24 +01:00
|
|
|
static coroutine_fn void nbd_read_reply_entry(void *opaque)
|
2013-12-01 22:23:41 +01:00
|
|
|
{
|
2017-02-13 14:52:24 +01:00
|
|
|
NBDClientSession *s = opaque;
|
2013-12-01 22:23:41 +01:00
|
|
|
uint64_t i;
|
|
|
|
int ret;
|
2017-05-26 13:09:13 +02:00
|
|
|
Error *local_err = NULL;
|
2013-12-01 22:23:41 +01:00
|
|
|
|
2017-02-13 14:52:24 +01:00
|
|
|
for (;;) {
|
|
|
|
assert(s->reply.handle == 0);
|
2017-05-26 13:09:13 +02:00
|
|
|
ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
|
|
|
|
if (ret < 0) {
|
|
|
|
error_report_err(local_err);
|
|
|
|
}
|
2017-03-14 12:11:56 +01:00
|
|
|
if (ret <= 0) {
|
2017-02-13 14:52:24 +01:00
|
|
|
break;
|
2013-12-01 22:23:41 +01:00
|
|
|
}
|
|
|
|
|
2017-02-13 14:52:24 +01:00
|
|
|
/* There's no need for a mutex on the receive side, because the
|
|
|
|
* handler acts as a synchronization point and ensures that only
|
|
|
|
* one coroutine is called until the reply finishes.
|
|
|
|
*/
|
|
|
|
i = HANDLE_TO_INDEX(s, s->reply.handle);
|
|
|
|
if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {
|
|
|
|
break;
|
|
|
|
}
|
2013-12-01 22:23:41 +01:00
|
|
|
|
2017-02-13 14:52:24 +01:00
|
|
|
/* We're woken up by the recv_coroutine itself. Note that there
|
|
|
|
* is no race between yielding and reentering read_reply_co. This
|
|
|
|
* is because:
|
|
|
|
*
|
|
|
|
* - if recv_coroutine[i] runs on the same AioContext, it is only
|
|
|
|
* entered after we yield
|
|
|
|
*
|
|
|
|
* - if recv_coroutine[i] runs on a different AioContext, reentering
|
|
|
|
* read_reply_co happens through a bottom half, which can only
|
|
|
|
* run after we yield.
|
|
|
|
*/
|
|
|
|
aio_co_wake(s->recv_coroutine[i]);
|
|
|
|
qemu_coroutine_yield();
|
2013-12-01 22:23:41 +01:00
|
|
|
}
|
2017-03-14 12:11:56 +01:00
|
|
|
|
|
|
|
nbd_recv_coroutines_enter_all(s);
|
2017-02-13 14:52:24 +01:00
|
|
|
s->read_reply_co = NULL;
|
2013-12-01 22:23:41 +01:00
|
|
|
}
|
|
|
|
|
2015-02-06 22:06:16 +01:00
|
|
|
static int nbd_co_send_request(BlockDriverState *bs,
|
2016-10-14 20:33:07 +02:00
|
|
|
NBDRequest *request,
|
2016-07-15 20:32:03 +02:00
|
|
|
QEMUIOVector *qiov)
|
2013-12-01 22:23:41 +01:00
|
|
|
{
|
2016-10-14 20:33:06 +02:00
|
|
|
NBDClientSession *s = nbd_get_client_session(bs);
|
nbd: fix the co_queue multi-adding bug
When we tested the VM migartion between different hosts with NBD
devices, we found if we sent a cancel command after the drive_mirror
was just started, a coroutine re-enter error would occur. The stack
was as follow:
(gdb) bt
00) 0x00007fdfc744d885 in raise () from /lib64/libc.so.6
01) 0x00007fdfc744ee61 in abort () from /lib64/libc.so.6
02) 0x00007fdfca467cc5 in qemu_coroutine_enter (co=0x7fdfcaedb400, opaque=0x0)
at qemu-coroutine.c:118
03) 0x00007fdfca467f6c in qemu_co_queue_run_restart (co=0x7fdfcaedb400) at
qemu-coroutine-lock.c:59
04) 0x00007fdfca467be5 in coroutine_swap (from=0x7fdfcaf3c4e8,
to=0x7fdfcaedb400) at qemu-coroutine.c:96
05) 0x00007fdfca467cea in qemu_coroutine_enter (co=0x7fdfcaedb400, opaque=0x0)
at qemu-coroutine.c:123
06) 0x00007fdfca467f6c in qemu_co_queue_run_restart (co=0x7fdfcaedbdc0) at
qemu-coroutine-lock.c:59
07) 0x00007fdfca467be5 in coroutine_swap (from=0x7fdfcaf3c4e8,
to=0x7fdfcaedbdc0) at qemu-coroutine.c:96
08) 0x00007fdfca467cea in qemu_coroutine_enter (co=0x7fdfcaedbdc0, opaque=0x0)
at qemu-coroutine.c:123
09) 0x00007fdfca4a1fa4 in nbd_recv_coroutines_enter_all (s=0x7fdfcaef7dd0) at
block/nbd-client.c:41
10) 0x00007fdfca4a1ff9 in nbd_teardown_connection (client=0x7fdfcaef7dd0) at
block/nbd-client.c:50
11) 0x00007fdfca4a20f0 in nbd_reply_ready (opaque=0x7fdfcaef7dd0) at
block/nbd-client.c:92
12) 0x00007fdfca45ed80 in aio_dispatch (ctx=0x7fdfcae15e90) at aio-posix.c:144
13) 0x00007fdfca45ef1b in aio_poll (ctx=0x7fdfcae15e90, blocking=false) at
aio-posix.c:222
14) 0x00007fdfca448c34 in aio_ctx_dispatch (source=0x7fdfcae15e90, callback=0x0,
user_data=0x0) at async.c:212
15) 0x00007fdfc8f2f69a in g_main_context_dispatch () from
/usr/lib64/libglib-2.0.so.0
16) 0x00007fdfca45c391 in glib_pollfds_poll () at main-loop.c:190
17) 0x00007fdfca45c489 in os_host_main_loop_wait (timeout=1483677098) at
main-loop.c:235
18) 0x00007fdfca45c57b in main_loop_wait (nonblocking=0) at main-loop.c:484
19) 0x00007fdfca25f403 in main_loop () at vl.c:2249
20) 0x00007fdfca266fc2 in main (argc=42, argv=0x7ffff517d638,
envp=0x7ffff517d790) at vl.c:4814
We find the nbd_recv_coroutines_enter_all function (triggered by a cancel
command or a network connection breaking down) will enter a coroutine which
is waiting for the sending lock. If the lock is still held by another coroutine,
the entering coroutine will be added into the co_queue again. Latter, when the
lock is released, a coroutine re-enter error will occur.
This bug can be fixed simply by delaying the setting of recv_coroutine as
suggested by paolo. After applying this patch, we have tested the cancel
operation in mirror phase looply for more than 5 hous and everything is fine.
Without this patch, a coroutine re-enter error will occur in 5 minutes.
Signed-off-by: Bn Wu <wu.wubin@huawei.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1423552846-3896-1-git-send-email-wu.wubin@huawei.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2015-02-10 08:20:46 +01:00
|
|
|
int rc, ret, i;
|
2013-12-01 22:23:41 +01:00
|
|
|
|
|
|
|
qemu_co_mutex_lock(&s->send_mutex);
|
2017-06-01 12:44:56 +02:00
|
|
|
while (s->in_flight == MAX_NBD_REQUESTS) {
|
|
|
|
qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
|
|
|
|
}
|
|
|
|
s->in_flight++;
|
nbd: fix the co_queue multi-adding bug
When we tested the VM migartion between different hosts with NBD
devices, we found if we sent a cancel command after the drive_mirror
was just started, a coroutine re-enter error would occur. The stack
was as follow:
(gdb) bt
00) 0x00007fdfc744d885 in raise () from /lib64/libc.so.6
01) 0x00007fdfc744ee61 in abort () from /lib64/libc.so.6
02) 0x00007fdfca467cc5 in qemu_coroutine_enter (co=0x7fdfcaedb400, opaque=0x0)
at qemu-coroutine.c:118
03) 0x00007fdfca467f6c in qemu_co_queue_run_restart (co=0x7fdfcaedb400) at
qemu-coroutine-lock.c:59
04) 0x00007fdfca467be5 in coroutine_swap (from=0x7fdfcaf3c4e8,
to=0x7fdfcaedb400) at qemu-coroutine.c:96
05) 0x00007fdfca467cea in qemu_coroutine_enter (co=0x7fdfcaedb400, opaque=0x0)
at qemu-coroutine.c:123
06) 0x00007fdfca467f6c in qemu_co_queue_run_restart (co=0x7fdfcaedbdc0) at
qemu-coroutine-lock.c:59
07) 0x00007fdfca467be5 in coroutine_swap (from=0x7fdfcaf3c4e8,
to=0x7fdfcaedbdc0) at qemu-coroutine.c:96
08) 0x00007fdfca467cea in qemu_coroutine_enter (co=0x7fdfcaedbdc0, opaque=0x0)
at qemu-coroutine.c:123
09) 0x00007fdfca4a1fa4 in nbd_recv_coroutines_enter_all (s=0x7fdfcaef7dd0) at
block/nbd-client.c:41
10) 0x00007fdfca4a1ff9 in nbd_teardown_connection (client=0x7fdfcaef7dd0) at
block/nbd-client.c:50
11) 0x00007fdfca4a20f0 in nbd_reply_ready (opaque=0x7fdfcaef7dd0) at
block/nbd-client.c:92
12) 0x00007fdfca45ed80 in aio_dispatch (ctx=0x7fdfcae15e90) at aio-posix.c:144
13) 0x00007fdfca45ef1b in aio_poll (ctx=0x7fdfcae15e90, blocking=false) at
aio-posix.c:222
14) 0x00007fdfca448c34 in aio_ctx_dispatch (source=0x7fdfcae15e90, callback=0x0,
user_data=0x0) at async.c:212
15) 0x00007fdfc8f2f69a in g_main_context_dispatch () from
/usr/lib64/libglib-2.0.so.0
16) 0x00007fdfca45c391 in glib_pollfds_poll () at main-loop.c:190
17) 0x00007fdfca45c489 in os_host_main_loop_wait (timeout=1483677098) at
main-loop.c:235
18) 0x00007fdfca45c57b in main_loop_wait (nonblocking=0) at main-loop.c:484
19) 0x00007fdfca25f403 in main_loop () at vl.c:2249
20) 0x00007fdfca266fc2 in main (argc=42, argv=0x7ffff517d638,
envp=0x7ffff517d790) at vl.c:4814
We find the nbd_recv_coroutines_enter_all function (triggered by a cancel
command or a network connection breaking down) will enter a coroutine which
is waiting for the sending lock. If the lock is still held by another coroutine,
the entering coroutine will be added into the co_queue again. Latter, when the
lock is released, a coroutine re-enter error will occur.
This bug can be fixed simply by delaying the setting of recv_coroutine as
suggested by paolo. After applying this patch, we have tested the cancel
operation in mirror phase looply for more than 5 hous and everything is fine.
Without this patch, a coroutine re-enter error will occur in 5 minutes.
Signed-off-by: Bn Wu <wu.wubin@huawei.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1423552846-3896-1-git-send-email-wu.wubin@huawei.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2015-02-10 08:20:46 +01:00
|
|
|
|
|
|
|
for (i = 0; i < MAX_NBD_REQUESTS; i++) {
|
|
|
|
if (s->recv_coroutine[i] == NULL) {
|
|
|
|
s->recv_coroutine[i] = qemu_coroutine_self();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-10 19:41:04 +01:00
|
|
|
g_assert(qemu_in_coroutine());
|
nbd: fix the co_queue multi-adding bug
When we tested the VM migartion between different hosts with NBD
devices, we found if we sent a cancel command after the drive_mirror
was just started, a coroutine re-enter error would occur. The stack
was as follow:
(gdb) bt
00) 0x00007fdfc744d885 in raise () from /lib64/libc.so.6
01) 0x00007fdfc744ee61 in abort () from /lib64/libc.so.6
02) 0x00007fdfca467cc5 in qemu_coroutine_enter (co=0x7fdfcaedb400, opaque=0x0)
at qemu-coroutine.c:118
03) 0x00007fdfca467f6c in qemu_co_queue_run_restart (co=0x7fdfcaedb400) at
qemu-coroutine-lock.c:59
04) 0x00007fdfca467be5 in coroutine_swap (from=0x7fdfcaf3c4e8,
to=0x7fdfcaedb400) at qemu-coroutine.c:96
05) 0x00007fdfca467cea in qemu_coroutine_enter (co=0x7fdfcaedb400, opaque=0x0)
at qemu-coroutine.c:123
06) 0x00007fdfca467f6c in qemu_co_queue_run_restart (co=0x7fdfcaedbdc0) at
qemu-coroutine-lock.c:59
07) 0x00007fdfca467be5 in coroutine_swap (from=0x7fdfcaf3c4e8,
to=0x7fdfcaedbdc0) at qemu-coroutine.c:96
08) 0x00007fdfca467cea in qemu_coroutine_enter (co=0x7fdfcaedbdc0, opaque=0x0)
at qemu-coroutine.c:123
09) 0x00007fdfca4a1fa4 in nbd_recv_coroutines_enter_all (s=0x7fdfcaef7dd0) at
block/nbd-client.c:41
10) 0x00007fdfca4a1ff9 in nbd_teardown_connection (client=0x7fdfcaef7dd0) at
block/nbd-client.c:50
11) 0x00007fdfca4a20f0 in nbd_reply_ready (opaque=0x7fdfcaef7dd0) at
block/nbd-client.c:92
12) 0x00007fdfca45ed80 in aio_dispatch (ctx=0x7fdfcae15e90) at aio-posix.c:144
13) 0x00007fdfca45ef1b in aio_poll (ctx=0x7fdfcae15e90, blocking=false) at
aio-posix.c:222
14) 0x00007fdfca448c34 in aio_ctx_dispatch (source=0x7fdfcae15e90, callback=0x0,
user_data=0x0) at async.c:212
15) 0x00007fdfc8f2f69a in g_main_context_dispatch () from
/usr/lib64/libglib-2.0.so.0
16) 0x00007fdfca45c391 in glib_pollfds_poll () at main-loop.c:190
17) 0x00007fdfca45c489 in os_host_main_loop_wait (timeout=1483677098) at
main-loop.c:235
18) 0x00007fdfca45c57b in main_loop_wait (nonblocking=0) at main-loop.c:484
19) 0x00007fdfca25f403 in main_loop () at vl.c:2249
20) 0x00007fdfca266fc2 in main (argc=42, argv=0x7ffff517d638,
envp=0x7ffff517d790) at vl.c:4814
We find the nbd_recv_coroutines_enter_all function (triggered by a cancel
command or a network connection breaking down) will enter a coroutine which
is waiting for the sending lock. If the lock is still held by another coroutine,
the entering coroutine will be added into the co_queue again. Latter, when the
lock is released, a coroutine re-enter error will occur.
This bug can be fixed simply by delaying the setting of recv_coroutine as
suggested by paolo. After applying this patch, we have tested the cancel
operation in mirror phase looply for more than 5 hous and everything is fine.
Without this patch, a coroutine re-enter error will occur in 5 minutes.
Signed-off-by: Bn Wu <wu.wubin@huawei.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 1423552846-3896-1-git-send-email-wu.wubin@huawei.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2015-02-10 08:20:46 +01:00
|
|
|
assert(i < MAX_NBD_REQUESTS);
|
|
|
|
request->handle = INDEX_TO_HANDLE(s, i);
|
2016-02-10 19:41:01 +01:00
|
|
|
|
|
|
|
if (!s->ioc) {
|
|
|
|
qemu_co_mutex_unlock(&s->send_mutex);
|
|
|
|
return -EPIPE;
|
|
|
|
}
|
|
|
|
|
2013-12-01 22:23:41 +01:00
|
|
|
if (qiov) {
|
2016-02-10 19:41:01 +01:00
|
|
|
qio_channel_set_cork(s->ioc, true);
|
2016-02-10 19:41:04 +01:00
|
|
|
rc = nbd_send_request(s->ioc, request);
|
2013-12-01 22:23:41 +01:00
|
|
|
if (rc >= 0) {
|
2017-06-02 17:01:39 +02:00
|
|
|
ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false,
|
|
|
|
NULL);
|
2013-12-01 22:23:41 +01:00
|
|
|
if (ret != request->len) {
|
|
|
|
rc = -EIO;
|
|
|
|
}
|
|
|
|
}
|
2016-02-10 19:41:01 +01:00
|
|
|
qio_channel_set_cork(s->ioc, false);
|
2013-12-01 22:23:41 +01:00
|
|
|
} else {
|
2016-02-10 19:41:04 +01:00
|
|
|
rc = nbd_send_request(s->ioc, request);
|
2013-12-01 22:23:41 +01:00
|
|
|
}
|
|
|
|
qemu_co_mutex_unlock(&s->send_mutex);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2016-10-14 20:33:06 +02:00
|
|
|
static void nbd_co_receive_reply(NBDClientSession *s,
|
2016-10-14 20:33:07 +02:00
|
|
|
NBDRequest *request,
|
|
|
|
NBDReply *reply,
|
2016-07-15 20:32:03 +02:00
|
|
|
QEMUIOVector *qiov)
|
2013-12-01 22:23:41 +01:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2017-02-13 14:52:24 +01:00
|
|
|
/* Wait until we're woken up by nbd_read_reply_entry. */
|
2013-12-01 22:23:41 +01:00
|
|
|
qemu_coroutine_yield();
|
|
|
|
*reply = s->reply;
|
2016-02-10 19:41:01 +01:00
|
|
|
if (reply->handle != request->handle ||
|
|
|
|
!s->ioc) {
|
2013-12-01 22:23:41 +01:00
|
|
|
reply->error = EIO;
|
|
|
|
} else {
|
|
|
|
if (qiov && reply->error == 0) {
|
2017-06-02 17:01:39 +02:00
|
|
|
ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true,
|
|
|
|
NULL);
|
2013-12-01 22:23:41 +01:00
|
|
|
if (ret != request->len) {
|
|
|
|
reply->error = EIO;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Tell the read handler to read another header. */
|
|
|
|
s->reply.handle = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-02-13 14:52:24 +01:00
|
|
|
static void nbd_coroutine_end(BlockDriverState *bs,
|
2016-10-14 20:33:07 +02:00
|
|
|
NBDRequest *request)
|
2013-12-01 22:23:41 +01:00
|
|
|
{
|
2017-02-13 14:52:24 +01:00
|
|
|
NBDClientSession *s = nbd_get_client_session(bs);
|
2013-12-01 22:23:41 +01:00
|
|
|
int i = HANDLE_TO_INDEX(s, request->handle);
|
2017-02-13 14:52:24 +01:00
|
|
|
|
2013-12-01 22:23:41 +01:00
|
|
|
s->recv_coroutine[i] = NULL;
|
2017-02-13 14:52:24 +01:00
|
|
|
|
|
|
|
/* Kick the read_reply_co to get the next reply. */
|
|
|
|
if (s->read_reply_co) {
|
|
|
|
aio_co_wake(s->read_reply_co);
|
2013-12-01 22:23:41 +01:00
|
|
|
}
|
2017-06-01 12:44:56 +02:00
|
|
|
|
|
|
|
qemu_co_mutex_lock(&s->send_mutex);
|
|
|
|
s->in_flight--;
|
|
|
|
qemu_co_queue_next(&s->free_sema);
|
|
|
|
qemu_co_mutex_unlock(&s->send_mutex);
|
2013-12-01 22:23:41 +01:00
|
|
|
}
|
|
|
|
|
2016-07-16 01:23:07 +02:00
|
|
|
int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
|
|
|
|
uint64_t bytes, QEMUIOVector *qiov, int flags)
|
2013-12-01 22:23:41 +01:00
|
|
|
{
|
2016-10-14 20:33:06 +02:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2016-10-14 20:33:07 +02:00
|
|
|
NBDRequest request = {
|
2016-07-16 01:23:07 +02:00
|
|
|
.type = NBD_CMD_READ,
|
|
|
|
.from = offset,
|
|
|
|
.len = bytes,
|
|
|
|
};
|
2016-10-14 20:33:07 +02:00
|
|
|
NBDReply reply;
|
2013-12-01 22:23:41 +01:00
|
|
|
ssize_t ret;
|
|
|
|
|
2016-07-16 01:23:07 +02:00
|
|
|
assert(bytes <= NBD_MAX_BUFFER_SIZE);
|
|
|
|
assert(!flags);
|
2013-12-01 22:23:41 +01:00
|
|
|
|
2016-07-15 20:32:03 +02:00
|
|
|
ret = nbd_co_send_request(bs, &request, NULL);
|
2013-12-01 22:23:41 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
reply.error = -ret;
|
|
|
|
} else {
|
2016-07-15 20:32:03 +02:00
|
|
|
nbd_co_receive_reply(client, &request, &reply, qiov);
|
2013-12-01 22:23:41 +01:00
|
|
|
}
|
2017-02-13 14:52:24 +01:00
|
|
|
nbd_coroutine_end(bs, &request);
|
2013-12-01 22:23:41 +01:00
|
|
|
return -reply.error;
|
|
|
|
}
|
|
|
|
|
2016-07-16 01:23:07 +02:00
|
|
|
int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
|
|
|
|
uint64_t bytes, QEMUIOVector *qiov, int flags)
|
2013-12-01 22:23:41 +01:00
|
|
|
{
|
2016-10-14 20:33:06 +02:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2016-10-14 20:33:07 +02:00
|
|
|
NBDRequest request = {
|
2016-07-16 01:23:07 +02:00
|
|
|
.type = NBD_CMD_WRITE,
|
|
|
|
.from = offset,
|
|
|
|
.len = bytes,
|
|
|
|
};
|
2016-10-14 20:33:07 +02:00
|
|
|
NBDReply reply;
|
2013-12-01 22:23:41 +01:00
|
|
|
ssize_t ret;
|
|
|
|
|
2016-05-04 00:39:08 +02:00
|
|
|
if (flags & BDRV_REQ_FUA) {
|
2017-07-07 22:30:41 +02:00
|
|
|
assert(client->info.flags & NBD_FLAG_SEND_FUA);
|
2016-10-14 20:33:04 +02:00
|
|
|
request.flags |= NBD_CMD_FLAG_FUA;
|
2013-12-01 22:23:41 +01:00
|
|
|
}
|
|
|
|
|
2016-07-16 01:23:07 +02:00
|
|
|
assert(bytes <= NBD_MAX_BUFFER_SIZE);
|
2013-12-01 22:23:41 +01:00
|
|
|
|
2016-07-15 20:32:03 +02:00
|
|
|
ret = nbd_co_send_request(bs, &request, qiov);
|
2013-12-01 22:23:41 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
reply.error = -ret;
|
|
|
|
} else {
|
2016-07-15 20:32:03 +02:00
|
|
|
nbd_co_receive_reply(client, &request, &reply, NULL);
|
2013-12-01 22:23:41 +01:00
|
|
|
}
|
2017-02-13 14:52:24 +01:00
|
|
|
nbd_coroutine_end(bs, &request);
|
2013-12-01 22:23:41 +01:00
|
|
|
return -reply.error;
|
|
|
|
}
|
|
|
|
|
2016-10-14 20:33:18 +02:00
|
|
|
int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
|
2017-06-09 12:18:08 +02:00
|
|
|
int bytes, BdrvRequestFlags flags)
|
2016-10-14 20:33:18 +02:00
|
|
|
{
|
|
|
|
ssize_t ret;
|
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
|
|
|
NBDRequest request = {
|
|
|
|
.type = NBD_CMD_WRITE_ZEROES,
|
|
|
|
.from = offset,
|
2017-06-09 12:18:08 +02:00
|
|
|
.len = bytes,
|
2016-10-14 20:33:18 +02:00
|
|
|
};
|
|
|
|
NBDReply reply;
|
|
|
|
|
2017-07-07 22:30:41 +02:00
|
|
|
if (!(client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
|
2016-10-14 20:33:18 +02:00
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (flags & BDRV_REQ_FUA) {
|
2017-07-07 22:30:41 +02:00
|
|
|
assert(client->info.flags & NBD_FLAG_SEND_FUA);
|
2016-10-14 20:33:18 +02:00
|
|
|
request.flags |= NBD_CMD_FLAG_FUA;
|
|
|
|
}
|
|
|
|
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
|
|
|
|
request.flags |= NBD_CMD_FLAG_NO_HOLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = nbd_co_send_request(bs, &request, NULL);
|
|
|
|
if (ret < 0) {
|
|
|
|
reply.error = -ret;
|
|
|
|
} else {
|
|
|
|
nbd_co_receive_reply(client, &request, &reply, NULL);
|
|
|
|
}
|
2017-02-13 14:52:24 +01:00
|
|
|
nbd_coroutine_end(bs, &request);
|
2016-10-14 20:33:18 +02:00
|
|
|
return -reply.error;
|
|
|
|
}
|
|
|
|
|
2015-02-06 22:06:16 +01:00
|
|
|
int nbd_client_co_flush(BlockDriverState *bs)
|
2013-12-01 22:23:41 +01:00
|
|
|
{
|
2016-10-14 20:33:06 +02:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2016-10-14 20:33:07 +02:00
|
|
|
NBDRequest request = { .type = NBD_CMD_FLUSH };
|
|
|
|
NBDReply reply;
|
2013-12-01 22:23:41 +01:00
|
|
|
ssize_t ret;
|
|
|
|
|
2017-07-07 22:30:41 +02:00
|
|
|
if (!(client->info.flags & NBD_FLAG_SEND_FLUSH)) {
|
2013-12-01 22:23:41 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
request.from = 0;
|
|
|
|
request.len = 0;
|
|
|
|
|
2016-07-15 20:32:03 +02:00
|
|
|
ret = nbd_co_send_request(bs, &request, NULL);
|
2013-12-01 22:23:41 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
reply.error = -ret;
|
|
|
|
} else {
|
2016-07-15 20:32:03 +02:00
|
|
|
nbd_co_receive_reply(client, &request, &reply, NULL);
|
2013-12-01 22:23:41 +01:00
|
|
|
}
|
2017-02-13 14:52:24 +01:00
|
|
|
nbd_coroutine_end(bs, &request);
|
2013-12-01 22:23:41 +01:00
|
|
|
return -reply.error;
|
|
|
|
}
|
|
|
|
|
2017-06-09 12:18:08 +02:00
|
|
|
int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
|
2013-12-01 22:23:41 +01:00
|
|
|
{
|
2016-10-14 20:33:06 +02:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2016-10-14 20:33:07 +02:00
|
|
|
NBDRequest request = {
|
2016-07-16 01:23:02 +02:00
|
|
|
.type = NBD_CMD_TRIM,
|
|
|
|
.from = offset,
|
2017-06-09 12:18:08 +02:00
|
|
|
.len = bytes,
|
2016-07-16 01:23:02 +02:00
|
|
|
};
|
2016-10-14 20:33:07 +02:00
|
|
|
NBDReply reply;
|
2013-12-01 22:23:41 +01:00
|
|
|
ssize_t ret;
|
|
|
|
|
2017-07-07 22:30:41 +02:00
|
|
|
if (!(client->info.flags & NBD_FLAG_SEND_TRIM)) {
|
2013-12-01 22:23:41 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-07-15 20:32:03 +02:00
|
|
|
ret = nbd_co_send_request(bs, &request, NULL);
|
2013-12-01 22:23:41 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
reply.error = -ret;
|
|
|
|
} else {
|
2016-07-15 20:32:03 +02:00
|
|
|
nbd_co_receive_reply(client, &request, &reply, NULL);
|
2013-12-01 22:23:41 +01:00
|
|
|
}
|
2017-02-13 14:52:24 +01:00
|
|
|
nbd_coroutine_end(bs, &request);
|
2013-12-01 22:23:41 +01:00
|
|
|
return -reply.error;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2015-02-06 22:06:16 +01:00
|
|
|
void nbd_client_detach_aio_context(BlockDriverState *bs)
|
2014-05-08 16:34:43 +02:00
|
|
|
{
|
2017-02-13 14:52:24 +01:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2017-06-15 19:09:05 +02:00
|
|
|
qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
|
2014-05-08 16:34:43 +02:00
|
|
|
}
|
|
|
|
|
2015-02-06 22:06:16 +01:00
|
|
|
void nbd_client_attach_aio_context(BlockDriverState *bs,
|
|
|
|
AioContext *new_context)
|
2014-05-08 16:34:43 +02:00
|
|
|
{
|
2017-02-13 14:52:24 +01:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2017-06-15 19:09:05 +02:00
|
|
|
qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc), new_context);
|
2017-02-13 14:52:24 +01:00
|
|
|
aio_co_schedule(new_context, client->read_reply_co);
|
2014-05-08 16:34:43 +02:00
|
|
|
}
|
|
|
|
|
2015-02-06 22:06:16 +01:00
|
|
|
void nbd_client_close(BlockDriverState *bs)
|
2013-12-01 22:23:41 +01:00
|
|
|
{
|
2016-10-14 20:33:06 +02:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2016-10-14 20:33:07 +02:00
|
|
|
NBDRequest request = { .type = NBD_CMD_DISC };
|
2013-12-01 22:23:41 +01:00
|
|
|
|
2016-02-10 19:41:01 +01:00
|
|
|
if (client->ioc == NULL) {
|
2014-02-26 15:30:18 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2016-02-10 19:41:04 +01:00
|
|
|
nbd_send_request(client->ioc, &request);
|
2013-12-01 22:23:44 +01:00
|
|
|
|
2015-02-06 22:06:16 +01:00
|
|
|
nbd_teardown_connection(bs);
|
2013-12-01 22:23:41 +01:00
|
|
|
}
|
|
|
|
|
2016-02-10 19:41:12 +01:00
|
|
|
int nbd_client_init(BlockDriverState *bs,
|
|
|
|
QIOChannelSocket *sioc,
|
|
|
|
const char *export,
|
|
|
|
QCryptoTLSCreds *tlscreds,
|
|
|
|
const char *hostname,
|
|
|
|
Error **errp)
|
2013-12-01 22:23:41 +01:00
|
|
|
{
|
2016-10-14 20:33:06 +02:00
|
|
|
NBDClientSession *client = nbd_get_client_session(bs);
|
2013-12-01 22:23:41 +01:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* NBD handshake */
|
2013-12-01 22:23:43 +01:00
|
|
|
logout("session init %s\n", export);
|
2016-02-10 19:41:01 +01:00
|
|
|
qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
|
|
|
|
|
nbd: Implement NBD_INFO_BLOCK_SIZE on client
The upstream NBD Protocol has defined a new extension to allow
the server to advertise block sizes to the client, as well as
a way for the client to inform the server whether it intends to
obey block sizes.
When using the block layer as the client, we will obey block
sizes; but when used as 'qemu-nbd -c' to hand off to the
kernel nbd module as the client, we are still waiting for the
kernel to implement a way for us to learn if it will honor
block sizes (perhaps by an addition to sysfs, rather than an
ioctl), as well as any way to tell the kernel what additional
block sizes to obey (NBD_SET_BLKSIZE appears to be accurate
for the minimum size, but preferred and maximum sizes would
probably be new ioctl()s), so until then, we need to make our
request for block sizes conditional.
When using ioctl(NBD_SET_BLKSIZE) to hand off to the kernel,
use the minimum block size as the sector size if it is larger
than 512, which also has the nice effect of cooperating with
(non-qemu) servers that don't do read-modify-write when
exposing a block device with 4k sectors; it might also allow
us to visit a file larger than 2T on a 32-bit kernel.
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707203049.534-10-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-07 22:30:49 +02:00
|
|
|
client->info.request_sizes = true;
|
2016-02-10 19:41:04 +01:00
|
|
|
ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
|
2016-02-10 19:41:12 +01:00
|
|
|
tlscreds, hostname,
|
2017-07-07 22:30:41 +02:00
|
|
|
&client->ioc, &client->info, errp);
|
2013-12-01 22:23:41 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
logout("Failed to negotiate with the NBD server\n");
|
|
|
|
return ret;
|
|
|
|
}
|
2017-07-07 22:30:41 +02:00
|
|
|
if (client->info.flags & NBD_FLAG_SEND_FUA) {
|
2016-05-04 00:39:06 +02:00
|
|
|
bs->supported_write_flags = BDRV_REQ_FUA;
|
2016-11-17 21:13:54 +01:00
|
|
|
bs->supported_zero_flags |= BDRV_REQ_FUA;
|
|
|
|
}
|
2017-07-07 22:30:41 +02:00
|
|
|
if (client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
|
2016-11-17 21:13:54 +01:00
|
|
|
bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
|
2016-05-04 00:39:06 +02:00
|
|
|
}
|
nbd: Implement NBD_INFO_BLOCK_SIZE on client
The upstream NBD Protocol has defined a new extension to allow
the server to advertise block sizes to the client, as well as
a way for the client to inform the server whether it intends to
obey block sizes.
When using the block layer as the client, we will obey block
sizes; but when used as 'qemu-nbd -c' to hand off to the
kernel nbd module as the client, we are still waiting for the
kernel to implement a way for us to learn if it will honor
block sizes (perhaps by an addition to sysfs, rather than an
ioctl), as well as any way to tell the kernel what additional
block sizes to obey (NBD_SET_BLKSIZE appears to be accurate
for the minimum size, but preferred and maximum sizes would
probably be new ioctl()s), so until then, we need to make our
request for block sizes conditional.
When using ioctl(NBD_SET_BLKSIZE) to hand off to the kernel,
use the minimum block size as the sector size if it is larger
than 512, which also has the nice effect of cooperating with
(non-qemu) servers that don't do read-modify-write when
exposing a block device with 4k sectors; it might also allow
us to visit a file larger than 2T on a 32-bit kernel.
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20170707203049.534-10-eblake@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2017-07-07 22:30:49 +02:00
|
|
|
if (client->info.min_block > bs->bl.request_alignment) {
|
|
|
|
bs->bl.request_alignment = client->info.min_block;
|
|
|
|
}
|
2013-12-01 22:23:41 +01:00
|
|
|
|
|
|
|
qemu_co_mutex_init(&client->send_mutex);
|
nbd: Use CoQueue for free_sema instead of CoMutex
NBD is using the CoMutex in a way that wasn't anticipated. For example, if there are
N(N=26, MAX_NBD_REQUESTS=16) nbd write requests, so we will invoke nbd_client_co_pwritev
N times.
----------------------------------------------------------------------------------------
time request Actions
1 1 in_flight=1, Coroutine=C1
2 2 in_flight=2, Coroutine=C2
...
15 15 in_flight=15, Coroutine=C15
16 16 in_flight=16, Coroutine=C16, free_sema->holder=C16, mutex->locked=true
17 17 in_flight=16, Coroutine=C17, queue C17 into free_sema->queue
18 18 in_flight=16, Coroutine=C18, queue C18 into free_sema->queue
...
26 N in_flight=16, Coroutine=C26, queue C26 into free_sema->queue
----------------------------------------------------------------------------------------
Once nbd client recieves request No.16' reply, we will re-enter C16. It's ok, because
it's equal to 'free_sema->holder'.
----------------------------------------------------------------------------------------
time request Actions
27 16 in_flight=15, Coroutine=C16, free_sema->holder=C16, mutex->locked=false
----------------------------------------------------------------------------------------
Then nbd_coroutine_end invokes qemu_co_mutex_unlock what will pop coroutines from
free_sema->queue's head and enter C17. More free_sema->holder is C17 now.
----------------------------------------------------------------------------------------
time request Actions
28 17 in_flight=16, Coroutine=C17, free_sema->holder=C17, mutex->locked=true
----------------------------------------------------------------------------------------
In above scenario, we only recieves request No.16' reply. As time goes by, nbd client will
almostly recieves replies from requests 1 to 15 rather than request 17 who owns C17. In this
case, we will encounter assert "mutex->holder == self" failed since Kevin's commit 0e438cdc
"coroutine: Let CoMutex remember who holds it". For example, if nbd client recieves request
No.15' reply, qemu will stop unexpectedly:
----------------------------------------------------------------------------------------
time request Actions
29 15(most case) in_flight=15, Coroutine=C15, free_sema->holder=C17, mutex->locked=false
----------------------------------------------------------------------------------------
Per Paolo's suggestion "The simplest fix is to change it to CoQueue, which is like a condition
variable", this patch replaces CoMutex with CoQueue.
Cc: Wen Congyang <wency@cn.fujitsu.com>
Reported-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Message-Id: <1476267508-19499-1-git-send-email-xiecl.fnst@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-10-12 12:18:28 +02:00
|
|
|
qemu_co_queue_init(&client->free_sema);
|
2016-02-10 19:41:01 +01:00
|
|
|
client->sioc = sioc;
|
|
|
|
object_ref(OBJECT(client->sioc));
|
2016-02-10 19:41:11 +01:00
|
|
|
|
|
|
|
if (!client->ioc) {
|
|
|
|
client->ioc = QIO_CHANNEL(sioc);
|
|
|
|
object_ref(OBJECT(client->ioc));
|
|
|
|
}
|
2013-12-01 22:23:41 +01:00
|
|
|
|
|
|
|
/* Now that we're connected, set the socket to be non-blocking and
|
|
|
|
* kick the reply mechanism. */
|
2016-02-10 19:41:01 +01:00
|
|
|
qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
|
2017-02-13 14:52:24 +01:00
|
|
|
client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client);
|
2015-02-06 22:06:16 +01:00
|
|
|
nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
|
2013-12-01 22:23:41 +01:00
|
|
|
|
|
|
|
logout("Established connection with NBD server\n");
|
|
|
|
return 0;
|
|
|
|
}
|