NBD patches for 2023-10-05

- various: mailmap cleanups
 - Eric Blake: enable use of NBD 64-bit extended headers
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEccLMIrHEYCkn0vOqp6FrSiUnQ2oFAmUfA94ACgkQp6FrSiUn
 Q2o2hAf/Q4q6RoEG9WoOIa6WB1nk9tZN6GRfW4jS+09hJTFVbYUhJOcvQwwZjPT6
 6oIwde8w7uE+AxBOA3XPbgTOBnTnpt2RH2AYVctNYB2vonuSrx3/KE3XSi4nrJRo
 +XiPbOsDzQu+vYsI7XJ+5e13BE4iZVmzbyke0U6hO88uR6tQstDJV1Mhem4jIbCG
 uLzkVDs3yY5sUFoUe77lFGWgKqfh87eJXV18T76df97ZZ5O2/w9G8MknG/CTSsR0
 fmzzC1Q4a2UEFtX8M3etRQ/b5WbFYhM+XwFevm2YBpod89ejGEA0ohhpn+GpZDU9
 SU66lXL/5jM9N7RkiSFwvupaot9hvw==
 =UieX
 -----END PGP SIGNATURE-----

Merge tag 'pull-nbd-2023-10-05' of https://repo.or.cz/qemu/ericb into staging

NBD patches for 2023-10-05

- various: mailmap cleanups
- Eric Blake: enable use of NBD 64-bit extended headers

# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCAAdFiEEccLMIrHEYCkn0vOqp6FrSiUnQ2oFAmUfA94ACgkQp6FrSiUn
# Q2o2hAf/Q4q6RoEG9WoOIa6WB1nk9tZN6GRfW4jS+09hJTFVbYUhJOcvQwwZjPT6
# 6oIwde8w7uE+AxBOA3XPbgTOBnTnpt2RH2AYVctNYB2vonuSrx3/KE3XSi4nrJRo
# +XiPbOsDzQu+vYsI7XJ+5e13BE4iZVmzbyke0U6hO88uR6tQstDJV1Mhem4jIbCG
# uLzkVDs3yY5sUFoUe77lFGWgKqfh87eJXV18T76df97ZZ5O2/w9G8MknG/CTSsR0
# fmzzC1Q4a2UEFtX8M3etRQ/b5WbFYhM+XwFevm2YBpod89ejGEA0ohhpn+GpZDU9
# SU66lXL/5jM9N7RkiSFwvupaot9hvw==
# =UieX
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 05 Oct 2023 14:43:42 EDT
# gpg:                using RSA key 71C2CC22B1C4602927D2F3AAA7A16B4A2527436A
# gpg: Good signature from "Eric Blake <eblake@redhat.com>" [full]
# gpg:                 aka "Eric Blake (Free Software Programmer) <ebb9@byu.net>" [full]
# gpg:                 aka "[jpeg image of size 6874]" [full]
# Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2  F3AA A7A1 6B4A 2527 436A

* tag 'pull-nbd-2023-10-05' of https://repo.or.cz/qemu/ericb:
  nbd/server: Add FLAG_PAYLOAD support to CMD_BLOCK_STATUS
  nbd/server: Prepare for per-request filtering of BLOCK_STATUS
  nbd/server: Refactor list of negotiated meta contexts
  nbd/client: Request extended headers during negotiation
  nbd/client: Accept 64-bit block status chunks
  nbd/client: Initial support for extended headers
  nbd/client: Plumb errp through nbd_receive_replies
  nbd/server: Enable initial support for extended headers
  nbd/server: Support 64-bit block status
  nbd/server: Prepare to send extended header replies
  nbd/server: Prepare to receive extended header requests
  nbd/server: Support a request payload
  mailmap: Fix BALATON Zoltan author email
  maint: Tweak comments in mailmap regarding SPF
  mailmap: Fix Andrey Drobyshev author email

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2023-10-09 10:09:41 -04:00
commit e068c0b9c7
16 changed files with 537 additions and 160 deletions

View File

@ -40,12 +40,26 @@ Nick Hudson <hnick@vmware.com> hnick@vmware.com <hnick@vmware.com>
# for the cvs2svn initialization commit e63c3dc74bf.
# Next, translate a few commits where mailman rewrote the From: line due
# to strict SPF, although we prefer to avoid adding more entries like that.
# to strict SPF and DMARC. Usually, our build process should be flagging
# commits like these before maintainer merges; if you find the need to add
# a line here, please also report a bug against the part of the build
# process that let the mis-attribution slip through in the first place.
#
# If the mailing list munges your emails, use:
# git config sendemail.from '"Your Name" <your.email@example.com>'
# the use of "" in that line will differ from the typically unquoted
# 'git config user.name', which in turn is sufficient for 'git send-email'
# to add an extra From: line in the body of your email that takes
# precedence over any munged From: in the mail's headers.
# See https://lists.openembedded.org/g/openembedded-core/message/166515
# and https://lists.gnu.org/archive/html/qemu-devel/2023-09/msg06784.html
Ed Swierk <eswierk@skyportsystems.com> Ed Swierk via Qemu-devel <qemu-devel@nongnu.org>
Ian McKellar <ianloic@google.com> Ian McKellar via Qemu-devel <qemu-devel@nongnu.org>
Julia Suvorova <jusual@mail.ru> Julia Suvorova via Qemu-devel <qemu-devel@nongnu.org>
Justin Terry (VM) <juterry@microsoft.com> Justin Terry (VM) via Qemu-devel <qemu-devel@nongnu.org>
Stefan Weil <sw@weilnetz.de> Stefan Weil via <qemu-devel@nongnu.org>
Andrey Drobyshev <andrey.drobyshev@virtuozzo.com> Andrey Drobyshev via <qemu-block@nongnu.org>
BALATON Zoltan <balaton@eik.bme.hu> BALATON Zoltan via <qemu-ppc@nongnu.org>
# Next, replace old addresses by a more recent one.
Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> <aleksandar.markovic@mips.com>

View File

@ -416,7 +416,8 @@ static void coroutine_fn GRAPH_RDLOCK nbd_reconnect_attempt(BDRVNBDState *s)
reconnect_delay_timer_del(s);
}
static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t cookie)
static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t cookie,
Error **errp)
{
int ret;
uint64_t ind = COOKIE_TO_INDEX(cookie), ind2;
@ -457,20 +458,25 @@ static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t cookie)
/* We are under mutex and cookie is 0. We have to do the dirty work. */
assert(s->reply.cookie == 0);
ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, NULL);
if (ret <= 0) {
ret = ret ? ret : -EIO;
ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, s->info.mode, errp);
if (ret == 0) {
ret = -EIO;
error_setg(errp, "server dropped connection");
}
if (ret < 0) {
nbd_channel_error(s, ret);
return ret;
}
if (nbd_reply_is_structured(&s->reply) &&
s->info.mode < NBD_MODE_STRUCTURED) {
nbd_channel_error(s, -EINVAL);
error_setg(errp, "unexpected structured reply");
return -EINVAL;
}
ind2 = COOKIE_TO_INDEX(s->reply.cookie);
if (ind2 >= MAX_NBD_REQUESTS || !s->requests[ind2].coroutine) {
nbd_channel_error(s, -EINVAL);
error_setg(errp, "unexpected cookie value");
return -EINVAL;
}
if (s->reply.cookie == cookie) {
@ -609,13 +615,17 @@ static int nbd_parse_offset_hole_payload(BDRVNBDState *s,
*/
static int nbd_parse_blockstatus_payload(BDRVNBDState *s,
NBDStructuredReplyChunk *chunk,
uint8_t *payload, uint64_t orig_length,
NBDExtent32 *extent, Error **errp)
uint8_t *payload, bool wide,
uint64_t orig_length,
NBDExtent64 *extent, Error **errp)
{
uint32_t context_id;
uint32_t count;
size_t ext_len = wide ? sizeof(*extent) : sizeof(NBDExtent32);
size_t pay_len = sizeof(context_id) + wide * sizeof(count) + ext_len;
/* The server succeeded, so it must have sent [at least] one extent */
if (chunk->length < sizeof(context_id) + sizeof(*extent)) {
if (chunk->length < pay_len) {
error_setg(errp, "Protocol error: invalid payload for "
"NBD_REPLY_TYPE_BLOCK_STATUS");
return -EINVAL;
@ -630,8 +640,15 @@ static int nbd_parse_blockstatus_payload(BDRVNBDState *s,
return -EINVAL;
}
extent->length = payload_advance32(&payload);
extent->flags = payload_advance32(&payload);
if (wide) {
count = payload_advance32(&payload);
extent->length = payload_advance64(&payload);
extent->flags = payload_advance64(&payload);
} else {
count = 0;
extent->length = payload_advance32(&payload);
extent->flags = payload_advance32(&payload);
}
if (extent->length == 0) {
error_setg(errp, "Protocol error: server sent status chunk with "
@ -652,7 +669,7 @@ static int nbd_parse_blockstatus_payload(BDRVNBDState *s,
* (always a safe status, even if it loses information).
*/
if (s->info.min_block && !QEMU_IS_ALIGNED(extent->length,
s->info.min_block)) {
s->info.min_block)) {
trace_nbd_parse_blockstatus_compliance("extent length is unaligned");
if (extent->length > s->info.min_block) {
extent->length = QEMU_ALIGN_DOWN(extent->length,
@ -666,13 +683,15 @@ static int nbd_parse_blockstatus_payload(BDRVNBDState *s,
/*
* We used NBD_CMD_FLAG_REQ_ONE, so the server should not have
* sent us any more than one extent, nor should it have included
* status beyond our request in that extent. However, it's easy
* enough to ignore the server's noncompliance without killing the
* status beyond our request in that extent. Furthermore, a wide
* server should have replied with an accurate count (we left
* count at 0 for a narrow server). However, it's easy enough to
* ignore the server's noncompliance without killing the
* connection; just ignore trailing extents, and clamp things to
* the length of our request.
*/
if (chunk->length > sizeof(context_id) + sizeof(*extent)) {
trace_nbd_parse_blockstatus_compliance("more than one extent");
if (count != wide || chunk->length > pay_len) {
trace_nbd_parse_blockstatus_compliance("unexpected extent count");
}
if (extent->length > orig_length) {
extent->length = orig_length;
@ -842,9 +861,9 @@ static coroutine_fn int nbd_co_do_receive_one_chunk(
}
*request_ret = 0;
ret = nbd_receive_replies(s, cookie);
ret = nbd_receive_replies(s, cookie, errp);
if (ret < 0) {
error_setg(errp, "Connection closed");
error_prepend(errp, "Connection closed: ");
return -EIO;
}
assert(s->ioc);
@ -1118,7 +1137,7 @@ nbd_co_receive_cmdread_reply(BDRVNBDState *s, uint64_t cookie,
static int coroutine_fn
nbd_co_receive_blockstatus_reply(BDRVNBDState *s, uint64_t cookie,
uint64_t length, NBDExtent32 *extent,
uint64_t length, NBDExtent64 *extent,
int *request_ret, Error **errp)
{
NBDReplyChunkIter iter;
@ -1131,11 +1150,17 @@ nbd_co_receive_blockstatus_reply(BDRVNBDState *s, uint64_t cookie,
NBD_FOREACH_REPLY_CHUNK(s, iter, cookie, false, NULL, &reply, &payload) {
int ret;
NBDStructuredReplyChunk *chunk = &reply.structured;
bool wide;
assert(nbd_reply_is_structured(&reply));
switch (chunk->type) {
case NBD_REPLY_TYPE_BLOCK_STATUS_EXT:
case NBD_REPLY_TYPE_BLOCK_STATUS:
wide = chunk->type == NBD_REPLY_TYPE_BLOCK_STATUS_EXT;
if ((s->info.mode >= NBD_MODE_EXTENDED) != wide) {
trace_nbd_extended_headers_compliance("block_status");
}
if (received) {
nbd_channel_error(s, -EINVAL);
error_setg(&local_err, "Several BLOCK_STATUS chunks in reply");
@ -1143,9 +1168,9 @@ nbd_co_receive_blockstatus_reply(BDRVNBDState *s, uint64_t cookie,
}
received = true;
ret = nbd_parse_blockstatus_payload(s, &reply.structured,
payload, length, extent,
&local_err);
ret = nbd_parse_blockstatus_payload(
s, &reply.structured, payload, wide,
length, extent, &local_err);
if (ret < 0) {
nbd_channel_error(s, ret);
nbd_iter_channel_error(&iter, ret, &local_err);
@ -1375,7 +1400,7 @@ static int coroutine_fn GRAPH_RDLOCK nbd_client_co_block_status(
int64_t *pnum, int64_t *map, BlockDriverState **file)
{
int ret, request_ret;
NBDExtent32 extent = { 0 };
NBDExtent64 extent = { 0 };
BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
Error *local_err = NULL;

View File

@ -166,6 +166,7 @@ iscsi_xcopy(void *src_lun, uint64_t src_off, void *dst_lun, uint64_t dst_off, ui
# nbd.c
nbd_parse_blockstatus_compliance(const char *err) "ignoring extra data from non-compliant server: %s"
nbd_structured_read_compliance(const char *type) "server sent non-compliant unaligned read %s chunk"
nbd_extended_headers_compliance(const char *type) "server sent non-compliant %s chunk not matching choice of extended headers"
nbd_read_reply_entry_fail(int ret, const char *err) "ret = %d, err: %s"
nbd_co_request_fail(uint64_t from, uint64_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name, int ret, const char *err) "Request failed { .from = %" PRIu64", .len = %" PRIu64 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) } ret = %d, err: %s"
nbd_client_handshake(const char *export_name) "export '%s'"

View File

@ -69,3 +69,4 @@ NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE
NBD_CMD_FLAG_FAST_ZERO
* 5.2: NBD_CMD_BLOCK_STATUS for "qemu:allocation-depth"
* 7.1: NBD_FLAG_CAN_MULTI_CONN for shareable writable exports
* 8.2: NBD_OPT_EXTENDED_HEADERS, NBD_FLAG_BLOCK_STATUS_PAYLOAD

View File

@ -29,6 +29,7 @@
typedef struct NBDExport NBDExport;
typedef struct NBDClient NBDClient;
typedef struct NBDClientConnection NBDClientConnection;
typedef struct NBDMetaContexts NBDMetaContexts;
extern const BlockExportDriver blk_exp_nbd;
@ -76,6 +77,7 @@ typedef struct NBDRequest {
uint16_t flags; /* NBD_CMD_FLAG_* */
uint16_t type; /* NBD_CMD_* */
NBDMode mode; /* Determines which network representation to use */
NBDMetaContexts *contexts; /* Used by NBD_CMD_BLOCK_STATUS */
} NBDRequest;
typedef struct NBDSimpleReply {
@ -389,7 +391,8 @@ int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
Error **errp);
int nbd_send_request(QIOChannel *ioc, NBDRequest *request);
int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
NBDReply *reply, Error **errp);
NBDReply *reply, NBDMode mode,
Error **errp);
int nbd_client(int fd);
int nbd_disconnect(int fd);
int nbd_errno_to_system_errno(int err);

View File

@ -93,7 +93,7 @@ NBDClientConnection *nbd_client_connection_new(const SocketAddress *saddr,
.do_negotiation = do_negotiation,
.initial_info.request_sizes = true,
.initial_info.mode = NBD_MODE_STRUCTURED,
.initial_info.mode = NBD_MODE_EXTENDED,
.initial_info.base_allocation = true,
.initial_info.x_dirty_bitmap = g_strdup(x_dirty_bitmap),
.initial_info.name = g_strdup(export_name ?: "")

View File

@ -953,15 +953,23 @@ static int nbd_start_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
if (fixedNewStyle) {
int result = 0;
if (max_mode >= NBD_MODE_EXTENDED) {
result = nbd_request_simple_option(ioc,
NBD_OPT_EXTENDED_HEADERS,
false, errp);
if (result) {
return result < 0 ? -EINVAL : NBD_MODE_EXTENDED;
}
}
if (max_mode >= NBD_MODE_STRUCTURED) {
result = nbd_request_simple_option(ioc,
NBD_OPT_STRUCTURED_REPLY,
false, errp);
if (result < 0) {
return -EINVAL;
if (result) {
return result < 0 ? -EINVAL : NBD_MODE_STRUCTURED;
}
}
return result ? NBD_MODE_STRUCTURED : NBD_MODE_SIMPLE;
return NBD_MODE_SIMPLE;
} else {
return NBD_MODE_EXPORT_NAME;
}
@ -1034,6 +1042,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
}
switch (info->mode) {
case NBD_MODE_EXTENDED:
case NBD_MODE_STRUCTURED:
if (base_allocation) {
result = nbd_negotiate_simple_meta_context(ioc, info, errp);
@ -1144,7 +1153,7 @@ int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
*info = NULL;
result = nbd_start_negotiate(ioc, tlscreds, hostname, &sioc,
NBD_MODE_STRUCTURED, NULL, errp);
NBD_MODE_EXTENDED, NULL, errp);
if (tlscreds && sioc) {
ioc = sioc;
}
@ -1155,6 +1164,7 @@ int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
switch ((NBDMode)result) {
case NBD_MODE_SIMPLE:
case NBD_MODE_STRUCTURED:
case NBD_MODE_EXTENDED:
/* newstyle - use NBD_OPT_LIST to populate array, then try
* NBD_OPT_INFO on each array member. If structured replies
* are enabled, also try NBD_OPT_LIST_META_CONTEXT. */
@ -1191,7 +1201,7 @@ int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
break;
}
if (result == NBD_MODE_STRUCTURED &&
if (result >= NBD_MODE_STRUCTURED &&
nbd_list_meta_contexts(ioc, &array[i], errp) < 0) {
goto out;
}
@ -1346,22 +1356,29 @@ int nbd_disconnect(int fd)
int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
{
uint8_t buf[NBD_REQUEST_SIZE];
uint8_t buf[NBD_EXTENDED_REQUEST_SIZE];
size_t len;
assert(request->mode <= NBD_MODE_STRUCTURED); /* TODO handle extended */
assert(request->len <= UINT32_MAX);
trace_nbd_send_request(request->from, request->len, request->cookie,
request->flags, request->type,
nbd_cmd_lookup(request->type));
stl_be_p(buf, NBD_REQUEST_MAGIC);
stw_be_p(buf + 4, request->flags);
stw_be_p(buf + 6, request->type);
stq_be_p(buf + 8, request->cookie);
stq_be_p(buf + 16, request->from);
stl_be_p(buf + 24, request->len);
if (request->mode >= NBD_MODE_EXTENDED) {
stl_be_p(buf, NBD_EXTENDED_REQUEST_MAGIC);
stq_be_p(buf + 24, request->len);
len = NBD_EXTENDED_REQUEST_SIZE;
} else {
assert(request->len <= UINT32_MAX);
stl_be_p(buf, NBD_REQUEST_MAGIC);
stl_be_p(buf + 24, request->len);
len = NBD_REQUEST_SIZE;
}
return nbd_write(ioc, buf, sizeof(buf), NULL);
return nbd_write(ioc, buf, len, NULL);
}
/* nbd_receive_simple_reply
@ -1388,30 +1405,36 @@ static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply,
return 0;
}
/* nbd_receive_structured_reply_chunk
/* nbd_receive_reply_chunk_header
* Read structured reply chunk except magic field (which should be already
* read).
* read). Normalize into the compact form.
* Payload is not read.
*/
static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
NBDStructuredReplyChunk *chunk,
Error **errp)
static int nbd_receive_reply_chunk_header(QIOChannel *ioc, NBDReply *chunk,
Error **errp)
{
int ret;
size_t len;
uint64_t payload_len;
assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
if (chunk->magic == NBD_STRUCTURED_REPLY_MAGIC) {
len = sizeof(chunk->structured);
} else {
assert(chunk->magic == NBD_EXTENDED_REPLY_MAGIC);
len = sizeof(chunk->extended);
}
ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
sizeof(*chunk) - sizeof(chunk->magic), "structured chunk",
len - sizeof(chunk->magic), "structured chunk",
errp);
if (ret < 0) {
return ret;
}
chunk->flags = be16_to_cpu(chunk->flags);
chunk->type = be16_to_cpu(chunk->type);
chunk->cookie = be64_to_cpu(chunk->cookie);
chunk->length = be32_to_cpu(chunk->length);
/* flags, type, and cookie occupy same space between forms */
chunk->structured.flags = be16_to_cpu(chunk->structured.flags);
chunk->structured.type = be16_to_cpu(chunk->structured.type);
chunk->structured.cookie = be64_to_cpu(chunk->structured.cookie);
/*
* Because we use BLOCK_STATUS with REQ_ONE, and cap READ requests
@ -1419,11 +1442,20 @@ static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
* this. Even if we stopped using REQ_ONE, sane servers will cap
* the number of extents they return for block status.
*/
if (chunk->length > NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData)) {
if (chunk->magic == NBD_STRUCTURED_REPLY_MAGIC) {
payload_len = be32_to_cpu(chunk->structured.length);
} else {
/* For now, we are ignoring the extended header offset. */
payload_len = be64_to_cpu(chunk->extended.length);
chunk->magic = NBD_STRUCTURED_REPLY_MAGIC;
}
if (payload_len > NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData)) {
error_setg(errp, "server chunk %" PRIu32 " (%s) payload is too long",
chunk->type, nbd_rep_lookup(chunk->type));
chunk->structured.type,
nbd_rep_lookup(chunk->structured.type));
return -EINVAL;
}
chunk->structured.length = payload_len;
return 0;
}
@ -1470,19 +1502,21 @@ nbd_read_eof(BlockDriverState *bs, QIOChannel *ioc, void *buffer, size_t size,
/* nbd_receive_reply
*
* Decreases bs->in_flight while waiting for a new reply. This yield is where
* we wait indefinitely and the coroutine must be able to be safely reentered
* for nbd_client_attach_aio_context().
* Wait for a new reply. If this yields, the coroutine must be able to be
* safely reentered for nbd_client_attach_aio_context(). @mode determines
* which reply magic we are expecting, although this normalizes the result
* so that the caller only has to work with compact headers.
*
* Returns 1 on success
* 0 on eof, when no data was read (errp is not set)
* negative errno on failure (errp is set)
* 0 on eof, when no data was read
* negative errno on failure
*/
int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
NBDReply *reply, Error **errp)
NBDReply *reply, NBDMode mode, Error **errp)
{
int ret;
const char *type;
uint32_t expected;
ret = nbd_read_eof(bs, ioc, &reply->magic, sizeof(reply->magic), errp);
if (ret <= 0) {
@ -1491,34 +1525,44 @@ int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
reply->magic = be32_to_cpu(reply->magic);
/* Diagnose but accept wrong-width header */
switch (reply->magic) {
case NBD_SIMPLE_REPLY_MAGIC:
if (mode >= NBD_MODE_EXTENDED) {
trace_nbd_receive_wrong_header(reply->magic,
nbd_mode_lookup(mode));
}
ret = nbd_receive_simple_reply(ioc, &reply->simple, errp);
if (ret < 0) {
break;
return ret;
}
trace_nbd_receive_simple_reply(reply->simple.error,
nbd_err_lookup(reply->simple.error),
reply->cookie);
break;
case NBD_STRUCTURED_REPLY_MAGIC:
ret = nbd_receive_structured_reply_chunk(ioc, &reply->structured, errp);
case NBD_EXTENDED_REPLY_MAGIC:
expected = mode >= NBD_MODE_EXTENDED ? NBD_EXTENDED_REPLY_MAGIC
: NBD_STRUCTURED_REPLY_MAGIC;
if (reply->magic != expected) {
trace_nbd_receive_wrong_header(reply->magic,
nbd_mode_lookup(mode));
}
ret = nbd_receive_reply_chunk_header(ioc, reply, errp);
if (ret < 0) {
break;
return ret;
}
type = nbd_reply_type_lookup(reply->structured.type);
trace_nbd_receive_structured_reply_chunk(reply->structured.flags,
reply->structured.type, type,
reply->structured.cookie,
reply->structured.length);
trace_nbd_receive_reply_chunk_header(reply->structured.flags,
reply->structured.type, type,
reply->structured.cookie,
reply->structured.length);
break;
default:
trace_nbd_receive_wrong_header(reply->magic, nbd_mode_lookup(mode));
error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", reply->magic);
return -EINVAL;
}
if (ret < 0) {
return ret;
}
return 1;
}

View File

@ -34,8 +34,11 @@
* https://github.com/yoe/nbd/blob/master/doc/proto.md
*/
/* Size of all NBD_OPT_*, without payload */
/* Size of all compact NBD_CMD_*, without payload */
#define NBD_REQUEST_SIZE (4 + 2 + 2 + 8 + 8 + 4)
/* Size of all extended NBD_CMD_*, without payload */
#define NBD_EXTENDED_REQUEST_SIZE (4 + 2 + 2 + 8 + 8 + 8)
/* Size of all NBD_REP_* sent in answer to most NBD_OPT_*, without payload */
#define NBD_REPLY_SIZE (4 + 4 + 8)
/* Size of reply to NBD_OPT_EXPORT_NAME */

View File

@ -105,11 +105,13 @@ struct NBDExport {
static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
/* NBDExportMetaContexts represents a list of contexts to be exported,
/*
* NBDMetaContexts represents a list of meta contexts in use,
* as selected by NBD_OPT_SET_META_CONTEXT. Also used for
* NBD_OPT_LIST_META_CONTEXT. */
typedef struct NBDExportMetaContexts {
NBDExport *exp;
* NBD_OPT_LIST_META_CONTEXT.
*/
struct NBDMetaContexts {
const NBDExport *exp; /* associated export */
size_t count; /* number of negotiated contexts */
bool base_allocation; /* export base:allocation context (block status) */
bool allocation_depth; /* export qemu:allocation-depth */
@ -117,7 +119,7 @@ typedef struct NBDExportMetaContexts {
* export qemu:dirty-bitmap:<export bitmap name>,
* sized by exp->nr_export_bitmaps
*/
} NBDExportMetaContexts;
};
struct NBDClient {
int refcount;
@ -144,7 +146,7 @@ struct NBDClient {
uint32_t check_align; /* If non-zero, check for aligned client requests */
NBDMode mode;
NBDExportMetaContexts export_meta;
NBDMetaContexts contexts; /* Negotiated meta contexts */
uint32_t opt; /* Current option being negotiated */
uint32_t optlen; /* remaining length of data in ioc for the option being
@ -455,10 +457,10 @@ static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
}
static void nbd_check_meta_export(NBDClient *client)
static void nbd_check_meta_export(NBDClient *client, NBDExport *exp)
{
if (client->exp != client->export_meta.exp) {
client->export_meta.count = 0;
if (exp != client->contexts.exp) {
client->contexts.count = 0;
}
}
@ -482,6 +484,10 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
[10 .. 133] reserved (0) [unless no_zeroes]
*/
trace_nbd_negotiate_handle_export_name();
if (client->mode >= NBD_MODE_EXTENDED) {
error_setg(errp, "Extended headers already negotiated");
return -EINVAL;
}
if (client->optlen > NBD_MAX_STRING_SIZE) {
error_setg(errp, "Bad length received");
return -EINVAL;
@ -500,11 +506,15 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
error_setg(errp, "export not found");
return -EINVAL;
}
nbd_check_meta_export(client, client->exp);
myflags = client->exp->nbdflags;
if (client->mode >= NBD_MODE_STRUCTURED) {
myflags |= NBD_FLAG_SEND_DF;
}
if (client->mode >= NBD_MODE_EXTENDED && client->contexts.count) {
myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD;
}
trace_nbd_negotiate_new_style_size_flags(client->exp->size, myflags);
stq_be_p(buf, client->exp->size);
stw_be_p(buf + 8, myflags);
@ -517,7 +527,6 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes,
QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
blk_exp_ref(&client->exp->common);
nbd_check_meta_export(client);
return 0;
}
@ -637,6 +646,9 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
errp, "export '%s' not present",
sane_name);
}
if (client->opt == NBD_OPT_GO) {
nbd_check_meta_export(client, exp);
}
/* Don't bother sending NBD_INFO_NAME unless client requested it */
if (sendname) {
@ -690,6 +702,10 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
if (client->mode >= NBD_MODE_STRUCTURED) {
myflags |= NBD_FLAG_SEND_DF;
}
if (client->mode >= NBD_MODE_EXTENDED &&
(client->contexts.count || client->opt == NBD_OPT_INFO)) {
myflags |= NBD_FLAG_BLOCK_STAT_PAYLOAD;
}
trace_nbd_negotiate_new_style_size_flags(exp->size, myflags);
stq_be_p(buf, exp->size);
stw_be_p(buf + 8, myflags);
@ -725,7 +741,6 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp)
client->check_align = check_align;
QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
blk_exp_ref(&client->exp->common);
nbd_check_meta_export(client);
rc = 1;
}
return rc;
@ -848,7 +863,7 @@ static bool nbd_strshift(const char **str, const char *prefix)
* Handle queries to 'base' namespace. For now, only the base:allocation
* context is available. Return true if @query has been handled.
*/
static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
static bool nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta,
const char *query)
{
if (!nbd_strshift(&query, "base:")) {
@ -868,7 +883,7 @@ static bool nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta,
* and qemu:allocation-depth contexts are available. Return true if @query
* has been handled.
*/
static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
static bool nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta,
const char *query)
{
size_t i;
@ -934,7 +949,7 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta,
* Return -errno on I/O error, 0 if option was completely handled by
* sending a reply about inconsistent lengths, or 1 on success. */
static int nbd_negotiate_meta_query(NBDClient *client,
NBDExportMetaContexts *meta, Error **errp)
NBDMetaContexts *meta, Error **errp)
{
int ret;
g_autofree char *query = NULL;
@ -973,14 +988,14 @@ static int nbd_negotiate_meta_query(NBDClient *client,
* Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT
*
* Return -errno on I/O error, or 0 if option was completely handled. */
static int nbd_negotiate_meta_queries(NBDClient *client,
NBDExportMetaContexts *meta, Error **errp)
static int nbd_negotiate_meta_queries(NBDClient *client, Error **errp)
{
int ret;
g_autofree char *export_name = NULL;
/* Mark unused to work around https://bugs.llvm.org/show_bug.cgi?id=3888 */
g_autofree G_GNUC_UNUSED bool *bitmaps = NULL;
NBDExportMetaContexts local_meta = {0};
NBDMetaContexts local_meta = {0};
NBDMetaContexts *meta;
uint32_t nb_queries;
size_t i;
size_t count = 0;
@ -996,6 +1011,8 @@ static int nbd_negotiate_meta_queries(NBDClient *client,
if (client->opt == NBD_OPT_LIST_META_CONTEXT) {
/* Only change the caller's meta on SET. */
meta = &local_meta;
} else {
meta = &client->contexts;
}
g_free(meta->bitmaps);
@ -1264,6 +1281,10 @@ static int nbd_negotiate_options(NBDClient *client, Error **errp)
case NBD_OPT_STRUCTURED_REPLY:
if (length) {
ret = nbd_reject_length(client, false, errp);
} else if (client->mode >= NBD_MODE_EXTENDED) {
ret = nbd_negotiate_send_rep_err(
client, NBD_REP_ERR_EXT_HEADER_REQD, errp,
"extended headers already negotiated");
} else if (client->mode >= NBD_MODE_STRUCTURED) {
ret = nbd_negotiate_send_rep_err(
client, NBD_REP_ERR_INVALID, errp,
@ -1276,8 +1297,20 @@ static int nbd_negotiate_options(NBDClient *client, Error **errp)
case NBD_OPT_LIST_META_CONTEXT:
case NBD_OPT_SET_META_CONTEXT:
ret = nbd_negotiate_meta_queries(client, &client->export_meta,
errp);
ret = nbd_negotiate_meta_queries(client, errp);
break;
case NBD_OPT_EXTENDED_HEADERS:
if (length) {
ret = nbd_reject_length(client, false, errp);
} else if (client->mode >= NBD_MODE_EXTENDED) {
ret = nbd_negotiate_send_rep_err(
client, NBD_REP_ERR_INVALID, errp,
"extended headers already negotiated");
} else {
ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp);
client->mode = NBD_MODE_EXTENDED;
}
break;
default:
@ -1411,11 +1444,13 @@ nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp)
static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *request,
Error **errp)
{
uint8_t buf[NBD_REQUEST_SIZE];
uint32_t magic;
uint8_t buf[NBD_EXTENDED_REQUEST_SIZE];
uint32_t magic, expect;
int ret;
size_t size = client->mode >= NBD_MODE_EXTENDED ?
NBD_EXTENDED_REQUEST_SIZE : NBD_REQUEST_SIZE;
ret = nbd_read_eof(client, buf, sizeof(buf), errp);
ret = nbd_read_eof(client, buf, size, errp);
if (ret < 0) {
return ret;
}
@ -1423,13 +1458,21 @@ static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *reque
return -EIO;
}
/* Request
[ 0 .. 3] magic (NBD_REQUEST_MAGIC)
[ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...)
[ 6 .. 7] type (NBD_CMD_READ, ...)
[ 8 .. 15] cookie
[16 .. 23] from
[24 .. 27] len
/*
* Compact request
* [ 0 .. 3] magic (NBD_REQUEST_MAGIC)
* [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...)
* [ 6 .. 7] type (NBD_CMD_READ, ...)
* [ 8 .. 15] cookie
* [16 .. 23] from
* [24 .. 27] len
* Extended request
* [ 0 .. 3] magic (NBD_EXTENDED_REQUEST_MAGIC)
* [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, NBD_CMD_FLAG_PAYLOAD_LEN, ...)
* [ 6 .. 7] type (NBD_CMD_READ, ...)
* [ 8 .. 15] cookie
* [16 .. 23] from
* [24 .. 31] len
*/
magic = ldl_be_p(buf);
@ -1437,13 +1480,20 @@ static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *reque
request->type = lduw_be_p(buf + 6);
request->cookie = ldq_be_p(buf + 8);
request->from = ldq_be_p(buf + 16);
request->len = (uint32_t)ldl_be_p(buf + 24); /* widen 32 to 64 bits */
if (client->mode >= NBD_MODE_EXTENDED) {
request->len = ldq_be_p(buf + 24);
expect = NBD_EXTENDED_REQUEST_MAGIC;
} else {
request->len = (uint32_t)ldl_be_p(buf + 24); /* widen 32 to 64 bits */
expect = NBD_REQUEST_MAGIC;
}
trace_nbd_receive_request(magic, request->flags, request->type,
request->from, request->len);
if (magic != NBD_REQUEST_MAGIC) {
error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
if (magic != expect) {
error_setg(errp, "invalid magic (got 0x%" PRIx32 ", expected 0x%"
PRIx32 ")", magic, expect);
return -EINVAL;
}
return 0;
@ -1474,7 +1524,7 @@ void nbd_client_put(NBDClient *client)
QTAILQ_REMOVE(&client->exp->clients, client, next);
blk_exp_unref(&client->exp->common);
}
g_free(client->export_meta.bitmaps);
g_free(client->contexts.bitmaps);
g_free(client);
}
}
@ -1921,8 +1971,6 @@ static inline void set_be_chunk(NBDClient *client, struct iovec *iov,
size_t niov, uint16_t flags, uint16_t type,
NBDRequest *request)
{
/* TODO - handle structured vs. extended replies */
NBDStructuredReplyChunk *chunk = iov->iov_base;
size_t i, length = 0;
for (i = 1; i < niov; i++) {
@ -1930,12 +1978,26 @@ static inline void set_be_chunk(NBDClient *client, struct iovec *iov,
}
assert(length <= NBD_MAX_BUFFER_SIZE + sizeof(NBDStructuredReadData));
iov[0].iov_len = sizeof(*chunk);
stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
stw_be_p(&chunk->flags, flags);
stw_be_p(&chunk->type, type);
stq_be_p(&chunk->cookie, request->cookie);
stl_be_p(&chunk->length, length);
if (client->mode >= NBD_MODE_EXTENDED) {
NBDExtendedReplyChunk *chunk = iov->iov_base;
iov[0].iov_len = sizeof(*chunk);
stl_be_p(&chunk->magic, NBD_EXTENDED_REPLY_MAGIC);
stw_be_p(&chunk->flags, flags);
stw_be_p(&chunk->type, type);
stq_be_p(&chunk->cookie, request->cookie);
stq_be_p(&chunk->offset, request->from);
stq_be_p(&chunk->length, length);
} else {
NBDStructuredReplyChunk *chunk = iov->iov_base;
iov[0].iov_len = sizeof(*chunk);
stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
stw_be_p(&chunk->flags, flags);
stw_be_p(&chunk->type, type);
stq_be_p(&chunk->cookie, request->cookie);
stl_be_p(&chunk->length, length);
}
}
static int coroutine_fn nbd_co_send_chunk_done(NBDClient *client,
@ -2074,20 +2136,24 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
}
typedef struct NBDExtentArray {
NBDExtent32 *extents;
NBDExtent64 *extents;
unsigned int nb_alloc;
unsigned int count;
uint64_t total_length;
bool extended;
bool can_add;
bool converted_to_be;
} NBDExtentArray;
static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc)
static NBDExtentArray *nbd_extent_array_new(unsigned int nb_alloc,
NBDMode mode)
{
NBDExtentArray *ea = g_new0(NBDExtentArray, 1);
assert(mode >= NBD_MODE_STRUCTURED);
ea->nb_alloc = nb_alloc;
ea->extents = g_new(NBDExtent32, nb_alloc);
ea->extents = g_new(NBDExtent64, nb_alloc);
ea->extended = mode >= NBD_MODE_EXTENDED;
ea->can_add = true;
return ea;
@ -2106,15 +2172,36 @@ static void nbd_extent_array_convert_to_be(NBDExtentArray *ea)
int i;
assert(!ea->converted_to_be);
assert(ea->extended);
ea->can_add = false;
ea->converted_to_be = true;
for (i = 0; i < ea->count; i++) {
ea->extents[i].flags = cpu_to_be32(ea->extents[i].flags);
ea->extents[i].length = cpu_to_be32(ea->extents[i].length);
ea->extents[i].length = cpu_to_be64(ea->extents[i].length);
ea->extents[i].flags = cpu_to_be64(ea->extents[i].flags);
}
}
/* Further modifications of the array after conversion are abandoned */
static NBDExtent32 *nbd_extent_array_convert_to_narrow(NBDExtentArray *ea)
{
int i;
NBDExtent32 *extents = g_new(NBDExtent32, ea->count);
assert(!ea->converted_to_be);
assert(!ea->extended);
ea->can_add = false;
ea->converted_to_be = true;
for (i = 0; i < ea->count; i++) {
assert((ea->extents[i].length | ea->extents[i].flags) <= UINT32_MAX);
extents[i].length = cpu_to_be32(ea->extents[i].length);
extents[i].flags = cpu_to_be32(ea->extents[i].flags);
}
return extents;
}
/*
* Add extent to NBDExtentArray. If extent can't be added (no available space),
* return -1.
@ -2125,19 +2212,27 @@ static void nbd_extent_array_convert_to_be(NBDExtentArray *ea)
* would result in an incorrect range reported to the client)
*/
static int nbd_extent_array_add(NBDExtentArray *ea,
uint32_t length, uint32_t flags)
uint64_t length, uint32_t flags)
{
assert(ea->can_add);
if (!length) {
return 0;
}
if (!ea->extended) {
assert(length <= UINT32_MAX);
}
/* Extend previous extent if flags are the same */
if (ea->count > 0 && flags == ea->extents[ea->count - 1].flags) {
uint64_t sum = (uint64_t)length + ea->extents[ea->count - 1].length;
uint64_t sum = length + ea->extents[ea->count - 1].length;
if (sum <= UINT32_MAX) {
/*
* sum cannot overflow: the block layer bounds image size at
* 2^63, and ea->extents[].length comes from the block layer.
*/
assert(sum >= length);
if (sum <= UINT32_MAX || ea->extended) {
ea->extents[ea->count - 1].length = sum;
ea->total_length += length;
return 0;
@ -2150,7 +2245,7 @@ static int nbd_extent_array_add(NBDExtentArray *ea,
}
ea->total_length += length;
ea->extents[ea->count] = (NBDExtent32) {.length = length, .flags = flags};
ea->extents[ea->count] = (NBDExtent64) {.length = length, .flags = flags};
ea->count++;
return 0;
@ -2219,20 +2314,39 @@ nbd_co_send_extents(NBDClient *client, NBDRequest *request, NBDExtentArray *ea,
bool last, uint32_t context_id, Error **errp)
{
NBDReply hdr;
NBDStructuredMeta chunk;
struct iovec iov[] = {
{.iov_base = &hdr},
{.iov_base = &chunk, .iov_len = sizeof(chunk)},
{.iov_base = ea->extents, .iov_len = ea->count * sizeof(ea->extents[0])}
};
NBDStructuredMeta meta;
NBDExtendedMeta meta_ext;
g_autofree NBDExtent32 *extents = NULL;
uint16_t type;
struct iovec iov[] = { {.iov_base = &hdr}, {0}, {0} };
nbd_extent_array_convert_to_be(ea);
if (client->mode >= NBD_MODE_EXTENDED) {
type = NBD_REPLY_TYPE_BLOCK_STATUS_EXT;
iov[1].iov_base = &meta_ext;
iov[1].iov_len = sizeof(meta_ext);
stl_be_p(&meta_ext.context_id, context_id);
stl_be_p(&meta_ext.count, ea->count);
nbd_extent_array_convert_to_be(ea);
iov[2].iov_base = ea->extents;
iov[2].iov_len = ea->count * sizeof(ea->extents[0]);
} else {
type = NBD_REPLY_TYPE_BLOCK_STATUS;
iov[1].iov_base = &meta;
iov[1].iov_len = sizeof(meta);
stl_be_p(&meta.context_id, context_id);
extents = nbd_extent_array_convert_to_narrow(ea);
iov[2].iov_base = extents;
iov[2].iov_len = ea->count * sizeof(extents[0]);
}
trace_nbd_co_send_extents(request->cookie, ea->count, context_id,
ea->total_length, last);
set_be_chunk(client, iov, 3, last ? NBD_REPLY_FLAG_DONE : 0,
NBD_REPLY_TYPE_BLOCK_STATUS, request);
stl_be_p(&chunk.context_id, context_id);
set_be_chunk(client, iov, 3, last ? NBD_REPLY_FLAG_DONE : 0, type,
request);
return nbd_co_send_iov(client, iov, 3, errp);
}
@ -2241,13 +2355,14 @@ nbd_co_send_extents(NBDClient *client, NBDRequest *request, NBDExtentArray *ea,
static int
coroutine_fn nbd_co_send_block_status(NBDClient *client, NBDRequest *request,
BlockBackend *blk, uint64_t offset,
uint32_t length, bool dont_fragment,
uint64_t length, bool dont_fragment,
bool last, uint32_t context_id,
Error **errp)
{
int ret;
unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
g_autoptr(NBDExtentArray) ea =
nbd_extent_array_new(nb_extents, client->mode);
if (context_id == NBD_META_ID_BASE_ALLOCATION) {
ret = blockstatus_to_extents(blk, offset, length, ea);
@ -2270,11 +2385,12 @@ static void bitmap_to_extents(BdrvDirtyBitmap *bitmap,
int64_t start, dirty_start, dirty_count;
int64_t end = offset + length;
bool full = false;
int64_t bound = es->extended ? INT64_MAX : INT32_MAX;
bdrv_dirty_bitmap_lock(bitmap);
for (start = offset;
bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, INT32_MAX,
bdrv_dirty_bitmap_next_dirty_area(bitmap, start, end, bound,
&dirty_start, &dirty_count);
start = dirty_start + dirty_count)
{
@ -2298,18 +2414,103 @@ static int coroutine_fn nbd_co_send_bitmap(NBDClient *client,
NBDRequest *request,
BdrvDirtyBitmap *bitmap,
uint64_t offset,
uint32_t length, bool dont_fragment,
uint64_t length, bool dont_fragment,
bool last, uint32_t context_id,
Error **errp)
{
unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
g_autoptr(NBDExtentArray) ea =
nbd_extent_array_new(nb_extents, client->mode);
bitmap_to_extents(bitmap, offset, length, ea);
return nbd_co_send_extents(client, request, ea, last, context_id, errp);
}
/*
* nbd_co_block_status_payload_read
* Called when a client wants a subset of negotiated contexts via a
* BLOCK_STATUS payload. Check the payload for valid length and
* contents. On success, return 0 with request updated to effective
* length. If request was invalid but all payload consumed, return 0
* with request->len and request->contexts->count set to 0 (which will
* trigger an appropriate NBD_EINVAL response later on). Return
* negative errno if the payload was not fully consumed.
*/
static int
nbd_co_block_status_payload_read(NBDClient *client, NBDRequest *request,
Error **errp)
{
uint64_t payload_len = request->len;
g_autofree char *buf = NULL;
size_t count, i, nr_bitmaps;
uint32_t id;
if (payload_len > NBD_MAX_BUFFER_SIZE) {
error_setg(errp, "len (%" PRIu64 ") is larger than max len (%u)",
request->len, NBD_MAX_BUFFER_SIZE);
return -EINVAL;
}
assert(client->contexts.exp == client->exp);
nr_bitmaps = client->exp->nr_export_bitmaps;
request->contexts = g_new0(NBDMetaContexts, 1);
request->contexts->exp = client->exp;
if (payload_len % sizeof(uint32_t) ||
payload_len < sizeof(NBDBlockStatusPayload) ||
payload_len > (sizeof(NBDBlockStatusPayload) +
sizeof(id) * client->contexts.count)) {
goto skip;
}
buf = g_malloc(payload_len);
if (nbd_read(client->ioc, buf, payload_len,
"CMD_BLOCK_STATUS data", errp) < 0) {
return -EIO;
}
trace_nbd_co_receive_request_payload_received(request->cookie,
payload_len);
request->contexts->bitmaps = g_new0(bool, nr_bitmaps);
count = (payload_len - sizeof(NBDBlockStatusPayload)) / sizeof(id);
payload_len = 0;
for (i = 0; i < count; i++) {
id = ldl_be_p(buf + sizeof(NBDBlockStatusPayload) + sizeof(id) * i);
if (id == NBD_META_ID_BASE_ALLOCATION) {
if (!client->contexts.base_allocation ||
request->contexts->base_allocation) {
goto skip;
}
request->contexts->base_allocation = true;
} else if (id == NBD_META_ID_ALLOCATION_DEPTH) {
if (!client->contexts.allocation_depth ||
request->contexts->allocation_depth) {
goto skip;
}
request->contexts->allocation_depth = true;
} else {
unsigned idx = id - NBD_META_ID_DIRTY_BITMAP;
if (idx >= nr_bitmaps || !client->contexts.bitmaps[idx] ||
request->contexts->bitmaps[idx]) {
goto skip;
}
request->contexts->bitmaps[idx] = true;
}
}
request->len = ldq_be_p(buf);
request->contexts->count = count;
return 0;
skip:
trace_nbd_co_receive_block_status_payload_compliance(request->from,
request->len);
request->len = request->contexts->count = 0;
return nbd_drop(client->ioc, payload_len, errp);
}
/* nbd_co_receive_request
* Collect a client request. Return 0 if request looks valid, -EIO to drop
* connection right away, -EAGAIN to indicate we were interrupted and the
@ -2322,10 +2523,12 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req,
Error **errp)
{
NBDClient *client = req->client;
bool extended_with_payload;
bool check_length = false;
bool check_rofs = false;
bool allocate_buffer = false;
unsigned payload_len = 0;
bool payload_okay = false;
uint64_t payload_len = 0;
int valid_flags = NBD_CMD_FLAG_FUA;
int ret;
@ -2338,6 +2541,13 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req,
trace_nbd_co_receive_request_decode_type(request->cookie, request->type,
nbd_cmd_lookup(request->type));
extended_with_payload = client->mode >= NBD_MODE_EXTENDED &&
request->flags & NBD_CMD_FLAG_PAYLOAD_LEN;
if (extended_with_payload) {
payload_len = request->len;
check_length = true;
}
switch (request->type) {
case NBD_CMD_DISC:
/* Special case: we're going to disconnect without a reply,
@ -2354,6 +2564,15 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req,
break;
case NBD_CMD_WRITE:
if (client->mode >= NBD_MODE_EXTENDED) {
if (!extended_with_payload) {
/* The client is noncompliant. Trace it, but proceed. */
trace_nbd_co_receive_ext_payload_compliance(request->from,
request->len);
}
valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN;
}
payload_okay = true;
payload_len = request->len;
check_length = true;
allocate_buffer = true;
@ -2377,6 +2596,18 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req,
break;
case NBD_CMD_BLOCK_STATUS:
if (extended_with_payload) {
ret = nbd_co_block_status_payload_read(client, request, errp);
if (ret < 0) {
return ret;
}
/* payload now consumed */
check_length = false;
payload_len = 0;
valid_flags |= NBD_CMD_FLAG_PAYLOAD_LEN;
} else {
request->contexts = &client->contexts;
}
valid_flags |= NBD_CMD_FLAG_REQ_ONE;
break;
@ -2395,6 +2626,16 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req,
request->len, NBD_MAX_BUFFER_SIZE);
return -EINVAL;
}
if (payload_len && !payload_okay) {
/*
* For now, we don't support payloads on other commands; but
* we can keep the connection alive by ignoring the payload.
* We will fail the command later with NBD_EINVAL for the use
* of an unsupported flag (and not for access beyond bounds).
*/
assert(request->type != NBD_CMD_WRITE);
request->len = 0;
}
if (allocate_buffer) {
/* READ, WRITE */
req->data = blk_try_blockalign(client->exp->common.blk,
@ -2405,10 +2646,14 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req,
}
}
if (payload_len) {
/* WRITE */
assert(req->data);
ret = nbd_read(client->ioc, req->data, payload_len,
"CMD_WRITE data", errp);
if (payload_okay) {
/* WRITE */
assert(req->data);
ret = nbd_read(client->ioc, req->data, payload_len,
"CMD_WRITE data", errp);
} else {
ret = nbd_drop(client->ioc, payload_len, errp);
}
if (ret < 0) {
return -EIO;
}
@ -2463,6 +2708,8 @@ static coroutine_fn int nbd_send_generic_reply(NBDClient *client,
{
if (client->mode >= NBD_MODE_STRUCTURED && ret < 0) {
return nbd_co_send_chunk_error(client, request, -ret, error_msg, errp);
} else if (client->mode >= NBD_MODE_EXTENDED) {
return nbd_co_send_chunk_done(client, request, errp);
} else {
return nbd_co_send_simple_reply(client, request, ret < 0 ? -ret : 0,
NULL, 0, errp);
@ -2604,16 +2851,18 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
"discard failed", errp);
case NBD_CMD_BLOCK_STATUS:
if (!request->len) {
return nbd_send_generic_reply(client, request, -EINVAL,
"need non-zero length", errp);
}
assert(request->len <= UINT32_MAX);
if (client->export_meta.count) {
assert(request->contexts);
assert(client->mode >= NBD_MODE_EXTENDED ||
request->len <= UINT32_MAX);
if (request->contexts->count) {
bool dont_fragment = request->flags & NBD_CMD_FLAG_REQ_ONE;
int contexts_remaining = client->export_meta.count;
int contexts_remaining = request->contexts->count;
if (client->export_meta.base_allocation) {
if (!request->len) {
return nbd_send_generic_reply(client, request, -EINVAL,
"need non-zero length", errp);
}
if (request->contexts->base_allocation) {
ret = nbd_co_send_block_status(client, request,
exp->common.blk,
request->from,
@ -2626,7 +2875,7 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
}
}
if (client->export_meta.allocation_depth) {
if (request->contexts->allocation_depth) {
ret = nbd_co_send_block_status(client, request,
exp->common.blk,
request->from, request->len,
@ -2639,8 +2888,9 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
}
}
assert(request->contexts->exp == client->exp);
for (i = 0; i < client->exp->nr_export_bitmaps; i++) {
if (!client->export_meta.bitmaps[i]) {
if (!request->contexts->bitmaps[i]) {
continue;
}
ret = nbd_co_send_bitmap(client, request,
@ -2656,6 +2906,10 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
assert(!contexts_remaining);
return 0;
} else if (client->contexts.count) {
return nbd_send_generic_reply(client, request, -EINVAL,
"CMD_BLOCK_STATUS payload not valid",
errp);
} else {
return nbd_send_generic_reply(client, request, -EINVAL,
"CMD_BLOCK_STATUS not negotiated",
@ -2734,13 +2988,19 @@ static coroutine_fn void nbd_trip(void *opaque)
} else {
ret = nbd_handle_request(client, &request, req->data, &local_err);
}
if (request.contexts && request.contexts != &client->contexts) {
assert(request.type == NBD_CMD_BLOCK_STATUS);
g_free(request.contexts->bitmaps);
g_free(request.contexts);
}
if (ret < 0) {
error_prepend(&local_err, "Failed to send reply: ");
goto disconnect;
}
/* We must disconnect after NBD_CMD_WRITE if we did not
* read the payload.
/*
* We must disconnect after NBD_CMD_WRITE or BLOCK_STATUS with
* payload if we did not read the payload.
*/
if (!req->complete) {
error_setg(&local_err, "Request handling failed in intermediate state");

View File

@ -33,7 +33,8 @@ nbd_client_clear_queue(void) "Clearing NBD queue"
nbd_client_clear_socket(void) "Clearing NBD socket"
nbd_send_request(uint64_t from, uint64_t len, uint64_t cookie, uint16_t flags, uint16_t type, const char *name) "Sending request to server: { .from = %" PRIu64", .len = %" PRIu64 ", .cookie = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) }"
nbd_receive_simple_reply(int32_t error, const char *errname, uint64_t cookie) "Got simple reply: { .error = %" PRId32 " (%s), cookie = %" PRIu64" }"
nbd_receive_structured_reply_chunk(uint16_t flags, uint16_t type, const char *name, uint64_t cookie, uint32_t length) "Got structured reply chunk: { flags = 0x%" PRIx16 ", type = %d (%s), cookie = %" PRIu64 ", length = %" PRIu32 " }"
nbd_receive_reply_chunk_header(uint16_t flags, uint16_t type, const char *name, uint64_t cookie, uint32_t length) "Got reply chunk header: { flags = 0x%" PRIx16 ", type = %" PRIu16 " (%s), cookie = %" PRIu64 ", length = %" PRIu32 " }"
nbd_receive_wrong_header(uint32_t magic, const char *mode) "Server sent unexpected magic 0x%" PRIx32 " for negotiated mode %s"
# common.c
nbd_unknown_error(int err) "Squashing unexpected error %d to EINVAL"
@ -69,8 +70,10 @@ nbd_co_send_chunk_read(uint64_t cookie, uint64_t offset, void *data, uint64_t si
nbd_co_send_chunk_read_hole(uint64_t cookie, uint64_t offset, uint64_t size) "Send structured read hole reply: cookie = %" PRIu64 ", offset = %" PRIu64 ", len = %" PRIu64
nbd_co_send_extents(uint64_t cookie, unsigned int extents, uint32_t id, uint64_t length, int last) "Send block status reply: cookie = %" PRIu64 ", extents = %u, context = %d (extents cover %" PRIu64 " bytes, last chunk = %d)"
nbd_co_send_chunk_error(uint64_t cookie, int err, const char *errname, const char *msg) "Send structured error reply: cookie = %" PRIu64 ", error = %d (%s), msg = '%s'"
nbd_co_receive_block_status_payload_compliance(uint64_t from, uint64_t len) "client sent unusable block status payload: from=0x%" PRIx64 ", len=0x%" PRIx64
nbd_co_receive_request_decode_type(uint64_t cookie, uint16_t type, const char *name) "Decoding type: cookie = %" PRIu64 ", type = %" PRIu16 " (%s)"
nbd_co_receive_request_payload_received(uint64_t cookie, uint64_t len) "Payload received: cookie = %" PRIu64 ", len = %" PRIu64
nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64
nbd_co_receive_align_compliance(const char *op, uint64_t from, uint64_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx64 ", align=0x%" PRIx32
nbd_trip(void) "Reading request"

View File

@ -219,6 +219,7 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls,
[NBD_FLAG_SEND_RESIZE_BIT] = "resize",
[NBD_FLAG_SEND_CACHE_BIT] = "cache",
[NBD_FLAG_SEND_FAST_ZERO_BIT] = "fast-zero",
[NBD_FLAG_BLOCK_STAT_PAYLOAD_BIT] = "block-status-payload",
};
printf(" size: %" PRIu64 "\n", list[i].size);
@ -235,6 +236,9 @@ static int qemu_nbd_client_list(SocketAddress *saddr, QCryptoTLSCreds *tls,
printf(" opt block: %u\n", list[i].opt_block);
printf(" max block: %u\n", list[i].max_block);
}
printf(" transaction size: %s\n",
list[i].mode >= NBD_MODE_EXTENDED ?
"64-bit" : "32-bit");
if (list[i].n_contexts) {
printf(" available meta contexts: %d\n", list[i].n_contexts);
for (j = 0; j < list[i].n_contexts; j++) {

View File

@ -83,29 +83,32 @@ exports available: 0
exports available: 3
export: 'n'
size: 4194304
flags: 0x58f ( readonly flush fua df multi cache )
flags: 0x158f ( readonly flush fua df multi cache block-status-payload )
min block: 1
opt block: 4096
max block: 33554432
transaction size: 64-bit
available meta contexts: 2
base:allocation
qemu:dirty-bitmap:b
export: 'n2'
description: some text
size: 4194304
flags: 0xded ( flush fua trim zeroes df multi cache fast-zero )
flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload )
min block: 1
opt block: 4096
max block: 33554432
transaction size: 64-bit
available meta contexts: 2
base:allocation
qemu:dirty-bitmap:b2
export: 'n3'
size: 4194304
flags: 0x58f ( readonly flush fua df multi cache )
flags: 0x158f ( readonly flush fua df multi cache block-status-payload )
min block: 1
opt block: 4096
max block: 33554432
transaction size: 64-bit
available meta contexts: 2
base:allocation
qemu:dirty-bitmap:b3
@ -202,29 +205,32 @@ exports available: 0
exports available: 3
export: 'n'
size: 4194304
flags: 0x58f ( readonly flush fua df multi cache )
flags: 0x158f ( readonly flush fua df multi cache block-status-payload )
min block: 1
opt block: 4096
max block: 33554432
transaction size: 64-bit
available meta contexts: 2
base:allocation
qemu:dirty-bitmap:b
export: 'n2'
description: some text
size: 4194304
flags: 0xded ( flush fua trim zeroes df multi cache fast-zero )
flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload )
min block: 1
opt block: 4096
max block: 33554432
transaction size: 64-bit
available meta contexts: 2
base:allocation
qemu:dirty-bitmap:b2
export: 'n3'
size: 4194304
flags: 0x58f ( readonly flush fua df multi cache )
flags: 0x158f ( readonly flush fua df multi cache block-status-payload )
min block: 1
opt block: 4096
max block: 33554432
transaction size: 64-bit
available meta contexts: 2
base:allocation
qemu:dirty-bitmap:b3

View File

@ -39,6 +39,7 @@ exports available: 1
export: ''
size: 67108864
min block: 1
transaction size: 64-bit
== check TLS fail over TCP with mismatched hostname ==
qemu-img: Could not open 'driver=nbd,host=localhost,port=PORT,tls-creds=tls0': Certificate does not match the hostname localhost
@ -53,6 +54,7 @@ exports available: 1
export: ''
size: 67108864
min block: 1
transaction size: 64-bit
== check TLS with different CA fails ==
qemu-img: Could not open 'driver=nbd,host=127.0.0.1,port=PORT,tls-creds=tls0': The certificate hasn't got a known issuer
@ -83,6 +85,7 @@ exports available: 1
export: ''
size: 67108864
min block: 1
transaction size: 64-bit
== check TLS works over UNIX with PSK ==
image: nbd+unix://?socket=SOCK_DIR/qemu-nbd.sock
@ -93,6 +96,7 @@ exports available: 1
export: ''
size: 67108864
min block: 1
transaction size: 64-bit
== check TLS fails over UNIX with mismatch PSK ==
qemu-img: Could not open 'driver=nbd,path=SOCK_DIR/qemu-nbd.sock,tls-creds=tls0': TLS handshake failed: The TLS connection was non-properly terminated.

View File

@ -6,6 +6,7 @@ exports available: 1
export: ''
size: 1024
min block: 1
transaction size: 64-bit
[{ "start": 0, "length": 1000, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
{ "start": 1000, "length": 24, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET}]
1 KiB (0x400) bytes allocated at offset 0 bytes (0x0)
@ -16,6 +17,7 @@ exports available: 1
export: ''
size: 1024
min block: 512
transaction size: 64-bit
[{ "start": 0, "length": 1024, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET}]
1 KiB (0x400) bytes allocated at offset 0 bytes (0x0)
WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw.
@ -28,6 +30,7 @@ exports available: 1
export: ''
size: 1024
min block: 1
transaction size: 64-bit
[{ "start": 0, "length": 1000, "depth": 0, "present": true, "zero": false, "data": true, "compressed": false, "offset": OFFSET},
{ "start": 1000, "length": 24, "depth": 0, "present": true, "zero": true, "data": false, "compressed": false, "offset": OFFSET}]
1 KiB (0x400) bytes allocated at offset 0 bytes (0x0)

View File

@ -15,10 +15,11 @@ wrote 4096/4096 bytes at offset 0
exports available: 1
export: 'fmt'
size: 67108864
flags: 0x58f ( readonly flush fua df multi cache )
flags: 0x158f ( readonly flush fua df multi cache block-status-payload )
min block: XXX
opt block: XXX
max block: XXX
transaction size: 64-bit
available meta contexts: 1
base:allocation
@ -43,10 +44,11 @@ exports available: 1
exports available: 1
export: 'fmt'
size: 67108864
flags: 0x58f ( readonly flush fua df multi cache )
flags: 0x158f ( readonly flush fua df multi cache block-status-payload )
min block: XXX
opt block: XXX
max block: XXX
transaction size: 64-bit
available meta contexts: 1
base:allocation
@ -74,19 +76,21 @@ exports available: 1
exports available: 2
export: 'fmt'
size: 67108864
flags: 0x58f ( readonly flush fua df multi cache )
flags: 0x158f ( readonly flush fua df multi cache block-status-payload )
min block: XXX
opt block: XXX
max block: XXX
transaction size: 64-bit
available meta contexts: 1
base:allocation
export: 'export1'
description: This is the writable second export
size: 67108864
flags: 0xded ( flush fua trim zeroes df multi cache fast-zero )
flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload )
min block: XXX
opt block: XXX
max block: XXX
transaction size: 64-bit
available meta contexts: 1
base:allocation
@ -109,10 +113,11 @@ exports available: 1
export: 'export1'
description: This is the writable second export
size: 67108864
flags: 0xded ( flush fua trim zeroes df multi cache fast-zero )
flags: 0x1ded ( flush fua trim zeroes df multi cache fast-zero block-status-payload )
min block: XXX
opt block: XXX
max block: XXX
transaction size: 64-bit
available meta contexts: 1
base:allocation

View File

@ -17,10 +17,11 @@ wrote 2097152/2097152 bytes at offset 1048576
exports available: 1
export: ''
size: 4194304
flags: 0x48f ( readonly flush fua df cache )
flags: 0x148f ( readonly flush fua df cache block-status-payload )
min block: 1
opt block: 4096
max block: 33554432
transaction size: 64-bit
available meta contexts: 2
base:allocation
qemu:allocation-depth