Migration pull 2022-05-16

(This replaces the 28th April through 10th May sets)
 Compared to that last set it just has the Alpine
 uring check that Leo has added; although that's also
 now fixed upstream in Alpine.
 
 It contains:
   TLS test fixes from Dan
   Zerocopy migration feature from Leo
 
 Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEERfXHG0oMt/uXep+pBRYzHrxb/ecFAmKCY80ACgkQBRYzHrxb
 /eeEEhAAoUogch7ifxFItr1EA0AU6Sgd3Dcn8wY9pm0NySVg7OcIpk1H++A3CgIh
 bubJSwRmpIxGw+5q5w5OvBukFCGYMlAK7J8k1tZmaqdKS8wD0ZwhpPyqTWd14Q/v
 xXSGOQfHMMvbBILiXPjSkfNw8yKJhZr+lW39uMz/kZRwZUmTcrdKAT3Q8PW+1DI9
 v3mNoFNXqtDlHcQ4nQ1TGk/RDO6oXDlTJwdnjoJT3Dopf8Jhl2etvZgVk2kOf4i5
 LmJbSVBr5FNOhJ6P4WL4OEQFOiXXquKdfuGTXIGGhkrW2WkPZulQwB6uO4Gv1wf2
 aj9bLDAFoPxFx2zYS6S/9L6rGeBMcTL9xHCfzyylM6YRjoscRdxXc67PClw71JUy
 regsoSQej0FpmsGx0uuAsDjCELleVIjeYzuQo5OYOP1BCg/5unLIrMgkyQw7COJI
 w+MIZq7IqvUTehU2yXpUGOqPkyDLBlib92dMRgqqG9r9UU7iL3BREbGW4ugW+GM2
 a9k8W9HjyDIIODsdXy1ugPHgjr/arHDAPgYosJMLvjTfdJDcIldAw6CbCcqhCDES
 UOjMVN9VS+716nY2AqvtEHxf47YwqmeRb+tg4SQ0dHLH5Pvfe2bk1sbZiiQpcelt
 Bd88yeBOpcmdzJVur2V4fEZXu5JB/qt/jeJeQa82hS3k93PWm/w=
 =Axhk
 -----END PGP SIGNATURE-----

Merge tag 'pull-migration-20220516a' of https://gitlab.com/dagrh/qemu into staging

Migration pull 2022-05-16

(This replaces the 28th April through 10th May sets)
Compared to that last set it just has the Alpine
uring check that Leo has added; although that's also
now fixed upstream in Alpine.

It contains:
  TLS test fixes from Dan
  Zerocopy migration feature from Leo

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEERfXHG0oMt/uXep+pBRYzHrxb/ecFAmKCY80ACgkQBRYzHrxb
# /eeEEhAAoUogch7ifxFItr1EA0AU6Sgd3Dcn8wY9pm0NySVg7OcIpk1H++A3CgIh
# bubJSwRmpIxGw+5q5w5OvBukFCGYMlAK7J8k1tZmaqdKS8wD0ZwhpPyqTWd14Q/v
# xXSGOQfHMMvbBILiXPjSkfNw8yKJhZr+lW39uMz/kZRwZUmTcrdKAT3Q8PW+1DI9
# v3mNoFNXqtDlHcQ4nQ1TGk/RDO6oXDlTJwdnjoJT3Dopf8Jhl2etvZgVk2kOf4i5
# LmJbSVBr5FNOhJ6P4WL4OEQFOiXXquKdfuGTXIGGhkrW2WkPZulQwB6uO4Gv1wf2
# aj9bLDAFoPxFx2zYS6S/9L6rGeBMcTL9xHCfzyylM6YRjoscRdxXc67PClw71JUy
# regsoSQej0FpmsGx0uuAsDjCELleVIjeYzuQo5OYOP1BCg/5unLIrMgkyQw7COJI
# w+MIZq7IqvUTehU2yXpUGOqPkyDLBlib92dMRgqqG9r9UU7iL3BREbGW4ugW+GM2
# a9k8W9HjyDIIODsdXy1ugPHgjr/arHDAPgYosJMLvjTfdJDcIldAw6CbCcqhCDES
# UOjMVN9VS+716nY2AqvtEHxf47YwqmeRb+tg4SQ0dHLH5Pvfe2bk1sbZiiQpcelt
# Bd88yeBOpcmdzJVur2V4fEZXu5JB/qt/jeJeQa82hS3k93PWm/w=
# =Axhk
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 16 May 2022 07:46:37 AM PDT
# gpg:                using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]

* tag 'pull-migration-20220516a' of https://gitlab.com/dagrh/qemu:
  multifd: Implement zero copy write in multifd migration (multifd-zero-copy)
  multifd: Send header packet without flags if zero-copy-send is enabled
  multifd: multifd_send_sync_main now returns negative on error
  migration: Add migrate_use_tls() helper
  migration: Add zero-copy-send parameter for QMP/HMP for Linux
  QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX
  QIOChannel: Add flags on io_writev and introduce io_flush callback
  meson.build: Fix docker-test-build@alpine when including linux/errqueue.h
  tests: ensure migration status isn't reported as failed
  tests: add multifd migration tests of TLS with x509 credentials
  tests: add multifd migration tests of TLS with PSK credentials
  tests: convert multifd migration tests to use common helper
  tests: convert XBZRLE migration test to use common helper
  tests: add migration tests of TLS with x509 credentials
  tests: add migration tests of TLS with PSK credentials
  tests: add more helper macros for creating TLS x509 certs
  tests: fix encoding of IP addresses in x509 certs

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2022-05-16 12:03:09 -07:00
commit 54b592c427
33 changed files with 1302 additions and 146 deletions

View File

@ -122,7 +122,7 @@ int io_channel_send_full(QIOChannel *ioc,
ret = qio_channel_writev_full(
ioc, &iov, 1,
fds, nfds, NULL);
fds, nfds, 0, NULL);
if (ret == QIO_CHANNEL_ERR_BLOCK) {
if (offset) {
return offset;

View File

@ -68,7 +68,7 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp)
}
if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send),
fds, nfds, errp)) {
fds, nfds, 0, errp)) {
ret = true;
} else {
trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds);

View File

@ -47,6 +47,8 @@ struct QIOChannelSocket {
socklen_t localAddrLen;
struct sockaddr_storage remoteAddr;
socklen_t remoteAddrLen;
ssize_t zero_copy_queued;
ssize_t zero_copy_sent;
};

View File

@ -32,12 +32,15 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
#define QIO_CHANNEL_ERR_BLOCK -2
#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
typedef enum QIOChannelFeature QIOChannelFeature;
enum QIOChannelFeature {
QIO_CHANNEL_FEATURE_FD_PASS,
QIO_CHANNEL_FEATURE_SHUTDOWN,
QIO_CHANNEL_FEATURE_LISTEN,
QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
};
@ -104,6 +107,7 @@ struct QIOChannelClass {
size_t niov,
int *fds,
size_t nfds,
int flags,
Error **errp);
ssize_t (*io_readv)(QIOChannel *ioc,
const struct iovec *iov,
@ -136,6 +140,8 @@ struct QIOChannelClass {
IOHandler *io_read,
IOHandler *io_write,
void *opaque);
int (*io_flush)(QIOChannel *ioc,
Error **errp);
};
/* General I/O handling functions */
@ -228,6 +234,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
* @niov: the length of the @iov array
* @fds: an array of file handles to send
* @nfds: number of file handles in @fds
* @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
* @errp: pointer to a NULL-initialized error object
*
* Write data to the IO channel, reading it from the
@ -260,6 +267,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
int flags,
Error **errp);
/**
@ -837,6 +845,7 @@ int qio_channel_readv_full_all(QIOChannel *ioc,
* @niov: the length of the @iov array
* @fds: an array of file handles to send
* @nfds: number of file handles in @fds
* @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
* @errp: pointer to a NULL-initialized error object
*
*
@ -846,6 +855,14 @@ int qio_channel_readv_full_all(QIOChannel *ioc,
* to be written, yielding from the current coroutine
* if required.
*
* If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags,
* instead of waiting for all requested data to be written,
* this function will wait until it's all queued for writing.
* In this case, if the buffer gets changed between queueing and
* sending, the updated buffer will be sent. If this is not a
* desired behavior, it's suggested to call qio_channel_flush()
* before reusing the buffer.
*
* Returns: 0 if all bytes were written, or -1 on error
*/
@ -853,6 +870,25 @@ int qio_channel_writev_full_all(QIOChannel *ioc,
const struct iovec *iov,
size_t niov,
int *fds, size_t nfds,
Error **errp);
int flags, Error **errp);
/**
* qio_channel_flush:
* @ioc: the channel object
* @errp: pointer to a NULL-initialized error object
*
* Will block until every packet queued with
* qio_channel_writev_full() + QIO_CHANNEL_WRITE_FLAG_ZERO_COPY
* is sent, or return in case of any error.
*
* If not implemented, acts as a no-op, and returns 0.
*
* Returns -1 if any error is found,
* 1 if every send failed to use zero copy.
* 0 otherwise.
*/
int qio_channel_flush(QIOChannel *ioc,
Error **errp);
#endif /* QIO_CHANNEL_H */

View File

@ -81,6 +81,7 @@ static ssize_t qio_channel_buffer_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
int flags,
Error **errp)
{
QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);

View File

@ -276,6 +276,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
int flags,
Error **errp)
{
QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);

View File

@ -114,6 +114,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
int flags,
Error **errp)
{
QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);

View File

@ -25,6 +25,14 @@
#include "io/channel-watch.h"
#include "trace.h"
#include "qapi/clone-visitor.h"
#ifdef CONFIG_LINUX
#include <linux/errqueue.h>
#include <sys/socket.h>
#if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY))
#define QEMU_MSG_ZEROCOPY
#endif
#endif
#define SOCKET_MAX_FDS 16
@ -54,6 +62,8 @@ qio_channel_socket_new(void)
sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
sioc->fd = -1;
sioc->zero_copy_queued = 0;
sioc->zero_copy_sent = 0;
ioc = QIO_CHANNEL(sioc);
qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
@ -153,6 +163,16 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
return -1;
}
#ifdef QEMU_MSG_ZEROCOPY
int ret, v = 1;
ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v));
if (ret == 0) {
/* Zero copy available on host */
qio_channel_set_feature(QIO_CHANNEL(ioc),
QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY);
}
#endif
return 0;
}
@ -524,6 +544,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
int flags,
Error **errp)
{
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
@ -532,6 +553,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
size_t fdsize = sizeof(int) * nfds;
struct cmsghdr *cmsg;
int sflags = 0;
memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
@ -556,15 +578,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
memcpy(CMSG_DATA(cmsg), fds, fdsize);
}
#ifdef QEMU_MSG_ZEROCOPY
if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
sflags = MSG_ZEROCOPY;
}
#endif
retry:
ret = sendmsg(sioc->fd, &msg, 0);
ret = sendmsg(sioc->fd, &msg, sflags);
if (ret <= 0) {
if (errno == EAGAIN) {
switch (errno) {
case EAGAIN:
return QIO_CHANNEL_ERR_BLOCK;
}
if (errno == EINTR) {
case EINTR:
goto retry;
#ifdef QEMU_MSG_ZEROCOPY
case ENOBUFS:
if (sflags & MSG_ZEROCOPY) {
error_setg_errno(errp, errno,
"Process can't lock enough memory for using MSG_ZEROCOPY");
return -1;
}
break;
#endif
}
error_setg_errno(errp, errno,
"Unable to write to socket");
return -1;
@ -619,6 +657,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
int flags,
Error **errp)
{
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
@ -657,6 +696,74 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
}
#endif /* WIN32 */
#ifdef QEMU_MSG_ZEROCOPY
static int qio_channel_socket_flush(QIOChannel *ioc,
Error **errp)
{
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
struct msghdr msg = {};
struct sock_extended_err *serr;
struct cmsghdr *cm;
char control[CMSG_SPACE(sizeof(*serr))];
int received;
int ret = 1;
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
memset(control, 0, sizeof(control));
while (sioc->zero_copy_sent < sioc->zero_copy_queued) {
received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE);
if (received < 0) {
switch (errno) {
case EAGAIN:
/* Nothing on errqueue, wait until something is available */
qio_channel_wait(ioc, G_IO_ERR);
continue;
case EINTR:
continue;
default:
error_setg_errno(errp, errno,
"Unable to read errqueue");
return -1;
}
}
cm = CMSG_FIRSTHDR(&msg);
if (cm->cmsg_level != SOL_IP &&
cm->cmsg_type != IP_RECVERR) {
error_setg_errno(errp, EPROTOTYPE,
"Wrong cmsg in errqueue");
return -1;
}
serr = (void *) CMSG_DATA(cm);
if (serr->ee_errno != SO_EE_ORIGIN_NONE) {
error_setg_errno(errp, serr->ee_errno,
"Error on socket");
return -1;
}
if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
error_setg_errno(errp, serr->ee_origin,
"Error not from zero copy");
return -1;
}
/* No errors, count successfully finished sendmsg()*/
sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1;
/* If any sendmsg() succeeded using zero copy, return 0 at the end */
if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) {
ret = 0;
}
}
return ret;
}
#endif /* QEMU_MSG_ZEROCOPY */
static int
qio_channel_socket_set_blocking(QIOChannel *ioc,
bool enabled,
@ -787,6 +894,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass,
ioc_klass->io_set_delay = qio_channel_socket_set_delay;
ioc_klass->io_create_watch = qio_channel_socket_create_watch;
ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler;
#ifdef QEMU_MSG_ZEROCOPY
ioc_klass->io_flush = qio_channel_socket_flush;
#endif
}
static const TypeInfo qio_channel_socket_info = {

View File

@ -301,6 +301,7 @@ static ssize_t qio_channel_tls_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
int flags,
Error **errp)
{
QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);

View File

@ -1127,6 +1127,7 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
int flags,
Error **errp)
{
QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);

View File

@ -72,18 +72,32 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
int flags,
Error **errp)
{
QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
if ((fds || nfds) &&
!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
if (fds || nfds) {
if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
error_setg_errno(errp, EINVAL,
"Channel does not support file descriptor passing");
return -1;
}
if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
error_setg_errno(errp, EINVAL,
"Zero Copy does not support file descriptor passing");
return -1;
}
}
if ((flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) &&
!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
error_setg_errno(errp, EINVAL,
"Channel does not support file descriptor passing");
"Requested Zero Copy feature is not available");
return -1;
}
return klass->io_writev(ioc, iov, niov, fds, nfds, errp);
return klass->io_writev(ioc, iov, niov, fds, nfds, flags, errp);
}
@ -217,14 +231,14 @@ int qio_channel_writev_all(QIOChannel *ioc,
size_t niov,
Error **errp)
{
return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp);
return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, 0, errp);
}
int qio_channel_writev_full_all(QIOChannel *ioc,
const struct iovec *iov,
size_t niov,
int *fds, size_t nfds,
Error **errp)
int flags, Error **errp)
{
int ret = -1;
struct iovec *local_iov = g_new(struct iovec, niov);
@ -237,8 +251,10 @@ int qio_channel_writev_full_all(QIOChannel *ioc,
while (nlocal_iov > 0) {
ssize_t len;
len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds,
errp);
len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds,
nfds, flags, errp);
if (len == QIO_CHANNEL_ERR_BLOCK) {
if (qemu_in_coroutine()) {
qio_channel_yield(ioc, G_IO_OUT);
@ -277,7 +293,7 @@ ssize_t qio_channel_writev(QIOChannel *ioc,
size_t niov,
Error **errp)
{
return qio_channel_writev_full(ioc, iov, niov, NULL, 0, errp);
return qio_channel_writev_full(ioc, iov, niov, NULL, 0, 0, errp);
}
@ -297,7 +313,7 @@ ssize_t qio_channel_write(QIOChannel *ioc,
Error **errp)
{
struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen };
return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, errp);
return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, 0, errp);
}
@ -473,6 +489,19 @@ off_t qio_channel_io_seek(QIOChannel *ioc,
return klass->io_seek(ioc, offset, whence, errp);
}
int qio_channel_flush(QIOChannel *ioc,
Error **errp)
{
QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
if (!klass->io_flush ||
!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
return 0;
}
return klass->io_flush(ioc, errp);
}
static void qio_channel_restart_read(void *opaque)
{

View File

@ -515,12 +515,23 @@ if not get_option('linux_aio').auto() or have_block
required: get_option('linux_aio'),
kwargs: static_kwargs)
endif
linux_io_uring_test = '''
#include <liburing.h>
#include <linux/errqueue.h>
int main(void) { return 0; }'''
linux_io_uring = not_found
if not get_option('linux_io_uring').auto() or have_block
linux_io_uring = dependency('liburing', version: '>=0.3',
required: get_option('linux_io_uring'),
method: 'pkg-config', kwargs: static_kwargs)
if not cc.links(linux_io_uring_test)
linux_io_uring = not_found
endif
endif
libnfs = not_found
if not get_option('libnfs').auto() or have_block
libnfs = dependency('libnfs', version: '>=1.9.3',
@ -1742,6 +1753,7 @@ config_host_data.set('CONFIG_KEYUTILS', keyutils.found())
config_host_data.set('CONFIG_GETTID', has_gettid)
config_host_data.set('CONFIG_GNUTLS', gnutls.found())
config_host_data.set('CONFIG_GNUTLS_CRYPTO', gnutls_crypto.found())
config_host_data.set('CONFIG_TASN1', tasn1.found())
config_host_data.set('CONFIG_GCRYPT', gcrypt.found())
config_host_data.set('CONFIG_NETTLE', nettle.found())
config_host_data.set('CONFIG_QEMU_PRIVATE_XTS', xts == 'private')

View File

@ -38,8 +38,7 @@ void migration_channel_process_incoming(QIOChannel *ioc)
trace_migration_set_incoming_channel(
ioc, object_get_typename(OBJECT(ioc)));
if (s->parameters.tls_creds &&
*s->parameters.tls_creds &&
if (migrate_use_tls() &&
!object_dynamic_cast(OBJECT(ioc),
TYPE_QIO_CHANNEL_TLS)) {
migration_tls_channel_process_incoming(s, ioc, &local_err);

View File

@ -910,6 +910,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
params->multifd_zlib_level = s->parameters.multifd_zlib_level;
params->has_multifd_zstd_level = true;
params->multifd_zstd_level = s->parameters.multifd_zstd_level;
#ifdef CONFIG_LINUX
params->has_zero_copy_send = true;
params->zero_copy_send = s->parameters.zero_copy_send;
#endif
params->has_xbzrle_cache_size = true;
params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
params->has_max_postcopy_bandwidth = true;
@ -1493,7 +1497,16 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
return false;
}
#ifdef CONFIG_LINUX
if (params->zero_copy_send &&
(!migrate_use_multifd() ||
params->multifd_compression != MULTIFD_COMPRESSION_NONE ||
(params->tls_creds && *params->tls_creds))) {
error_setg(errp,
"Zero copy only available for non-compressed non-TLS multifd migration");
return false;
}
#endif
return true;
}
@ -1567,6 +1580,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
if (params->has_multifd_compression) {
dest->multifd_compression = params->multifd_compression;
}
#ifdef CONFIG_LINUX
if (params->has_zero_copy_send) {
dest->zero_copy_send = params->zero_copy_send;
}
#endif
if (params->has_xbzrle_cache_size) {
dest->xbzrle_cache_size = params->xbzrle_cache_size;
}
@ -1679,6 +1697,11 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
if (params->has_multifd_compression) {
s->parameters.multifd_compression = params->multifd_compression;
}
#ifdef CONFIG_LINUX
if (params->has_zero_copy_send) {
s->parameters.zero_copy_send = params->zero_copy_send;
}
#endif
if (params->has_xbzrle_cache_size) {
s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
xbzrle_cache_resize(params->xbzrle_cache_size, errp);
@ -2563,6 +2586,26 @@ int migrate_multifd_zstd_level(void)
return s->parameters.multifd_zstd_level;
}
#ifdef CONFIG_LINUX
bool migrate_use_zero_copy_send(void)
{
MigrationState *s;
s = migrate_get_current();
return s->parameters.zero_copy_send;
}
#endif
int migrate_use_tls(void)
{
MigrationState *s;
s = migrate_get_current();
return s->parameters.tls_creds && *s->parameters.tls_creds;
}
int migrate_use_xbzrle(void)
{
MigrationState *s;
@ -4206,6 +4249,10 @@ static Property migration_properties[] = {
DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState,
parameters.multifd_zstd_level,
DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL),
#ifdef CONFIG_LINUX
DEFINE_PROP_BOOL("zero_copy_send", MigrationState,
parameters.zero_copy_send, false),
#endif
DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
parameters.xbzrle_cache_size,
DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
@ -4303,6 +4350,9 @@ static void migration_instance_init(Object *obj)
params->has_multifd_compression = true;
params->has_multifd_zlib_level = true;
params->has_multifd_zstd_level = true;
#ifdef CONFIG_LINUX
params->has_zero_copy_send = true;
#endif
params->has_xbzrle_cache_size = true;
params->has_max_postcopy_bandwidth = true;
params->has_max_cpu_throttle = true;

View File

@ -375,6 +375,12 @@ MultiFDCompression migrate_multifd_compression(void);
int migrate_multifd_zlib_level(void);
int migrate_multifd_zstd_level(void);
#ifdef CONFIG_LINUX
bool migrate_use_zero_copy_send(void);
#else
#define migrate_use_zero_copy_send() (false)
#endif
int migrate_use_tls(void);
int migrate_use_xbzrle(void);
uint64_t migrate_xbzrle_cache_size(void);
bool migrate_colo_enabled(void);

View File

@ -566,19 +566,34 @@ void multifd_save_cleanup(void)
multifd_send_state = NULL;
}
void multifd_send_sync_main(QEMUFile *f)
int multifd_send_sync_main(QEMUFile *f)
{
int i;
bool flush_zero_copy;
if (!migrate_use_multifd()) {
return;
return 0;
}
if (multifd_send_state->pages->num) {
if (multifd_send_pages(f) < 0) {
error_report("%s: multifd_send_pages fail", __func__);
return;
return -1;
}
}
/*
* When using zero-copy, it's necessary to flush the pages before any of
* the pages can be sent again, so we'll make sure the new version of the
* pages will always arrive _later_ than the old pages.
*
* Currently we achieve this by flushing the zero-page requested writes
* per ram iteration, but in the future we could potentially optimize it
* to be less frequent, e.g. only after we finished one whole scanning of
* all the dirty bitmaps.
*/
flush_zero_copy = migrate_use_zero_copy_send();
for (i = 0; i < migrate_multifd_channels(); i++) {
MultiFDSendParams *p = &multifd_send_state->params[i];
@ -589,7 +604,7 @@ void multifd_send_sync_main(QEMUFile *f)
if (p->quit) {
error_report("%s: channel %d has already quit", __func__, i);
qemu_mutex_unlock(&p->mutex);
return;
return -1;
}
p->packet_num = multifd_send_state->packet_num++;
@ -600,6 +615,17 @@ void multifd_send_sync_main(QEMUFile *f)
ram_counters.transferred += p->packet_len;
qemu_mutex_unlock(&p->mutex);
qemu_sem_post(&p->sem);
if (flush_zero_copy && p->c) {
int ret;
Error *err = NULL;
ret = qio_channel_flush(p->c, &err);
if (ret < 0) {
error_report_err(err);
return -1;
}
}
}
for (i = 0; i < migrate_multifd_channels(); i++) {
MultiFDSendParams *p = &multifd_send_state->params[i];
@ -608,6 +634,8 @@ void multifd_send_sync_main(QEMUFile *f)
qemu_sem_wait(&p->sem_sync);
}
trace_multifd_send_sync_main(multifd_send_state->packet_num);
return 0;
}
static void *multifd_send_thread(void *opaque)
@ -615,6 +643,7 @@ static void *multifd_send_thread(void *opaque)
MultiFDSendParams *p = opaque;
Error *local_err = NULL;
int ret = 0;
bool use_zero_copy_send = migrate_use_zero_copy_send();
trace_multifd_send_thread_start(p->id);
rcu_register_thread();
@ -637,9 +666,14 @@ static void *multifd_send_thread(void *opaque)
if (p->pending_job) {
uint64_t packet_num = p->packet_num;
uint32_t flags = p->flags;
p->iovs_num = 1;
p->normal_num = 0;
if (use_zero_copy_send) {
p->iovs_num = 0;
} else {
p->iovs_num = 1;
}
for (int i = 0; i < p->pages->num; i++) {
p->normal[p->normal_num] = p->pages->offset[i];
p->normal_num++;
@ -663,11 +697,21 @@ static void *multifd_send_thread(void *opaque)
trace_multifd_send(p->id, packet_num, p->normal_num, flags,
p->next_packet_size);
p->iov[0].iov_len = p->packet_len;
p->iov[0].iov_base = p->packet;
if (use_zero_copy_send) {
/* Send header first, without zerocopy */
ret = qio_channel_write_all(p->c, (void *)p->packet,
p->packet_len, &local_err);
if (ret != 0) {
break;
}
} else {
/* Send header using the same writev call */
p->iov[0].iov_len = p->packet_len;
p->iov[0].iov_base = p->packet;
}
ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num,
&local_err);
ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL,
0, p->write_flags, &local_err);
if (ret != 0) {
break;
}
@ -782,15 +826,12 @@ static bool multifd_channel_connect(MultiFDSendParams *p,
QIOChannel *ioc,
Error *error)
{
MigrationState *s = migrate_get_current();
trace_multifd_set_outgoing_channel(
ioc, object_get_typename(OBJECT(ioc)),
migrate_get_current()->hostname, error);
if (!error) {
if (s->parameters.tls_creds &&
*s->parameters.tls_creds &&
if (migrate_use_tls() &&
!object_dynamic_cast(OBJECT(ioc),
TYPE_QIO_CHANNEL_TLS)) {
multifd_tls_channel_connect(p, ioc, &error);
@ -898,6 +939,13 @@ int multifd_save_setup(Error **errp)
/* We need one extra place for the packet header */
p->iov = g_new0(struct iovec, page_count + 1);
p->normal = g_new0(ram_addr_t, page_count);
if (migrate_use_zero_copy_send()) {
p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
} else {
p->write_flags = 0;
}
socket_send_channel_create(multifd_new_send_channel_async, p);
}

View File

@ -20,7 +20,7 @@ int multifd_load_cleanup(Error **errp);
bool multifd_recv_all_channels_created(void);
bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
void multifd_recv_sync_main(void);
void multifd_send_sync_main(QEMUFile *f);
int multifd_send_sync_main(QEMUFile *f);
int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
/* Multifd Compression flags */
@ -92,6 +92,8 @@ typedef struct {
uint32_t packet_len;
/* pointer to the packet */
MultiFDPacket_t *packet;
/* multifd flags for sending ram */
int write_flags;
/* multifd flags for each packet */
uint32_t flags;
/* size of the next packet that contains pages */

View File

@ -2909,6 +2909,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
{
RAMState **rsp = opaque;
RAMBlock *block;
int ret;
if (compress_threads_save_setup()) {
return -1;
@ -2943,7 +2944,11 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
ram_control_before_iterate(f, RAM_CONTROL_SETUP);
ram_control_after_iterate(f, RAM_CONTROL_SETUP);
multifd_send_sync_main(f);
ret = multifd_send_sync_main(f);
if (ret < 0) {
return ret;
}
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
qemu_fflush(f);
@ -3052,7 +3057,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
out:
if (ret >= 0
&& migration_is_setup_or_active(migrate_get_current()->state)) {
multifd_send_sync_main(rs->f);
ret = multifd_send_sync_main(rs->f);
if (ret < 0) {
return ret;
}
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
qemu_fflush(f);
ram_transferred_add(8);
@ -3112,13 +3121,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
ram_control_after_iterate(f, RAM_CONTROL_FINISH);
}
if (ret >= 0) {
multifd_send_sync_main(rs->f);
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
qemu_fflush(f);
if (ret < 0) {
return ret;
}
return ret;
ret = multifd_send_sync_main(rs->f);
if (ret < 0) {
return ret;
}
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
qemu_fflush(f);
return 0;
}
static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,

View File

@ -2840,6 +2840,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
int flags,
Error **errp)
{
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);

View File

@ -74,9 +74,17 @@ static void socket_outgoing_migration(QIOTask *task,
if (qio_task_propagate_error(task, &err)) {
trace_migration_socket_outgoing_error(error_get_pretty(err));
} else {
trace_migration_socket_outgoing_connected(data->hostname);
goto out;
}
trace_migration_socket_outgoing_connected(data->hostname);
if (migrate_use_zero_copy_send() &&
!qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
error_setg(&err, "Zero copy send feature not detected in host kernel");
}
out:
migration_channel_connect(data->s, sioc, data->hostname, err);
object_unref(OBJECT(sioc));
}

View File

@ -1309,6 +1309,12 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
p->has_multifd_zstd_level = true;
visit_type_uint8(v, param, &p->multifd_zstd_level, &err);
break;
#ifdef CONFIG_LINUX
case MIGRATION_PARAMETER_ZERO_COPY_SEND:
p->has_zero_copy_send = true;
visit_type_bool(v, param, &p->zero_copy_send, &err);
break;
#endif
case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
p->has_xbzrle_cache_size = true;
if (!visit_type_size(v, param, &cache_size, &err)) {

View File

@ -738,6 +738,13 @@
# will consume more CPU.
# Defaults to 1. (Since 5.0)
#
# @zero-copy-send: Controls behavior on sending memory pages on migration.
# When true, enables a zero-copy mechanism for sending
# memory pages, if host supports it.
# Requires that QEMU be permitted to use locked memory
# for guest RAM pages.
# Defaults to false. (Since 7.1)
#
# @block-bitmap-mapping: Maps block nodes and bitmaps on them to
# aliases for the purpose of dirty bitmap migration. Such
# aliases may for example be the corresponding names on the
@ -777,6 +784,7 @@
'xbzrle-cache-size', 'max-postcopy-bandwidth',
'max-cpu-throttle', 'multifd-compression',
'multifd-zlib-level' ,'multifd-zstd-level',
{ 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'},
'block-bitmap-mapping' ] }
##
@ -903,6 +911,13 @@
# will consume more CPU.
# Defaults to 1. (Since 5.0)
#
# @zero-copy-send: Controls behavior on sending memory pages on migration.
# When true, enables a zero-copy mechanism for sending
# memory pages, if host supports it.
# Requires that QEMU be permitted to use locked memory
# for guest RAM pages.
# Defaults to false. (Since 7.1)
#
# @block-bitmap-mapping: Maps block nodes and bitmaps on them to
# aliases for the purpose of dirty bitmap migration. Such
# aliases may for example be the corresponding names on the
@ -957,6 +972,7 @@
'*multifd-compression': 'MultiFDCompression',
'*multifd-zlib-level': 'uint8',
'*multifd-zstd-level': 'uint8',
'*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' },
'*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
##
@ -1103,6 +1119,13 @@
# will consume more CPU.
# Defaults to 1. (Since 5.0)
#
# @zero-copy-send: Controls behavior on sending memory pages on migration.
# When true, enables a zero-copy mechanism for sending
# memory pages, if host supports it.
# Requires that QEMU be permitted to use locked memory
# for guest RAM pages.
# Defaults to false. (Since 7.1)
#
# @block-bitmap-mapping: Maps block nodes and bitmaps on them to
# aliases for the purpose of dirty bitmap migration. Such
# aliases may for example be the corresponding names on the
@ -1155,6 +1178,7 @@
'*multifd-compression': 'MultiFDCompression',
'*multifd-zlib-level': 'uint8',
'*multifd-zstd-level': 'uint8',
'*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' },
'*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
##

View File

@ -77,7 +77,7 @@ static int pr_manager_helper_write(PRManagerHelper *pr_mgr,
iov.iov_base = (void *)buf;
iov.iov_len = sz;
n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1,
nfds ? &fd : NULL, nfds, errp);
nfds ? &fd : NULL, nfds, 0, errp);
if (n_written <= 0) {
assert(n_written != QIO_CHANNEL_ERR_BLOCK);

View File

@ -273,13 +273,23 @@ endif
tpmemu_files = ['tpm-emu.c', 'tpm-util.c', 'tpm-tests.c']
migration_files = [files('migration-helpers.c')]
if gnutls.found()
migration_files += [files('../unit/crypto-tls-psk-helpers.c'), gnutls]
if tasn1.found()
migration_files += [files('../unit/crypto-tls-x509-helpers.c',
'../unit/pkix_asn1_tab.c'), tasn1]
endif
endif
qtests = {
'bios-tables-test': [io, 'boot-sector.c', 'acpi-utils.c', 'tpm-emu.c'],
'cdrom-test': files('boot-sector.c'),
'dbus-vmstate-test': files('migration-helpers.c') + dbus_vmstate1,
'erst-test': files('erst-test.c'),
'ivshmem-test': [rt, '../../contrib/ivshmem-server/ivshmem-server.c'],
'migration-test': files('migration-helpers.c'),
'migration-test': migration_files,
'pxe-test': files('boot-sector.c'),
'qos-test': [chardev, io, qos_test_ss.apply(config_host, strict: false).sources()],
'tpm-crb-swtpm-test': [io, tpmemu_files],

View File

@ -107,6 +107,19 @@ QDict *migrate_query(QTestState *who)
return wait_command(who, "{ 'execute': 'query-migrate' }");
}
QDict *migrate_query_not_failed(QTestState *who)
{
const char *status;
QDict *rsp = migrate_query(who);
status = qdict_get_str(rsp, "status");
if (g_str_equal(status, "failed")) {
g_printerr("query-migrate shows failed migration: %s\n",
qdict_get_str(rsp, "error-desc"));
}
g_assert(!g_str_equal(status, "failed"));
return rsp;
}
/*
* Note: caller is responsible to free the returned object via
* g_free() after use

View File

@ -27,6 +27,7 @@ G_GNUC_PRINTF(3, 4)
void migrate_qmp(QTestState *who, const char *uri, const char *fmt, ...);
QDict *migrate_query(QTestState *who);
QDict *migrate_query_not_failed(QTestState *who);
void wait_for_migration_status(QTestState *who,
const char *goal, const char **ungoals);

File diff suppressed because it is too large Load Diff

View File

@ -24,7 +24,8 @@
#include "crypto-tls-psk-helpers.h"
#include "qemu/sockets.h"
void test_tls_psk_init(const char *pskfile)
static void
test_tls_psk_init_common(const char *pskfile, const char *user, const char *key)
{
FILE *fp;
@ -33,11 +34,22 @@ void test_tls_psk_init(const char *pskfile)
g_critical("Failed to create pskfile %s: %s", pskfile, strerror(errno));
abort();
}
/* Don't do this in real applications! Use psktool. */
fprintf(fp, "qemu:009d5638c40fde0c\n");
fprintf(fp, "%s:%s\n", user, key);
fclose(fp);
}
void test_tls_psk_init(const char *pskfile)
{
/* Don't hard code a key like this in real applications! Use psktool. */
test_tls_psk_init_common(pskfile, "qemu", "009d5638c40fde0c");
}
void test_tls_psk_init_alt(const char *pskfile)
{
/* Don't hard code a key like this in real applications! Use psktool. */
test_tls_psk_init_common(pskfile, "qemu", "10ffa6a2c42f0388");
}
void test_tls_psk_cleanup(const char *pskfile)
{
unlink(pskfile);

View File

@ -24,6 +24,7 @@
#include <gnutls/gnutls.h>
void test_tls_psk_init(const char *keyfile);
void test_tls_psk_init_alt(const char *keyfile);
void test_tls_psk_cleanup(const char *keyfile);
#endif

View File

@ -168,9 +168,19 @@ test_tls_get_ipaddr(const char *addrstr,
hints.ai_flags = AI_NUMERICHOST;
g_assert(getaddrinfo(addrstr, NULL, &hints, &res) == 0);
*datalen = res->ai_addrlen;
*data = g_new(char, *datalen);
memcpy(*data, res->ai_addr, *datalen);
if (res->ai_family == AF_INET) {
struct sockaddr_in *in = (struct sockaddr_in *)res->ai_addr;
*datalen = sizeof(in->sin_addr);
*data = g_new(char, *datalen);
memcpy(*data, &in->sin_addr, *datalen);
} else if (res->ai_family == AF_INET6) {
struct sockaddr_in6 *in = (struct sockaddr_in6 *)res->ai_addr;
*datalen = sizeof(in->sin6_addr);
*data = g_new(char, *datalen);
memcpy(*data, &in->sin6_addr, *datalen);
} else {
g_assert_not_reached();
}
freeaddrinfo(res);
}

View File

@ -26,6 +26,9 @@
#include <libtasn1.h>
#define QCRYPTO_TLS_TEST_CLIENT_NAME "ACME QEMU Client"
#define QCRYPTO_TLS_TEST_CLIENT_HOSTILE_NAME "ACME Hostile Client"
/*
* This contains parameter about how to generate
* certificates.
@ -118,6 +121,56 @@ void test_tls_cleanup(const char *keyfile);
}; \
test_tls_generate_cert(&varname, NULL)
# define TLS_ROOT_REQ_SIMPLE(varname, fname) \
QCryptoTLSTestCertReq varname = { \
.filename = fname, \
.cn = "qemu-CA", \
.basicConstraintsEnable = true, \
.basicConstraintsCritical = true, \
.basicConstraintsIsCA = true, \
.keyUsageEnable = true, \
.keyUsageCritical = true, \
.keyUsageValue = GNUTLS_KEY_KEY_CERT_SIGN, \
}; \
test_tls_generate_cert(&varname, NULL)
# define TLS_CERT_REQ_SIMPLE_CLIENT(varname, cavarname, cname, fname) \
QCryptoTLSTestCertReq varname = { \
.filename = fname, \
.cn = cname, \
.basicConstraintsEnable = true, \
.basicConstraintsCritical = true, \
.basicConstraintsIsCA = false, \
.keyUsageEnable = true, \
.keyUsageCritical = true, \
.keyUsageValue = \
GNUTLS_KEY_DIGITAL_SIGNATURE | GNUTLS_KEY_KEY_ENCIPHERMENT, \
.keyPurposeEnable = true, \
.keyPurposeCritical = true, \
.keyPurposeOID1 = GNUTLS_KP_TLS_WWW_CLIENT, \
}; \
test_tls_generate_cert(&varname, cavarname.crt)
# define TLS_CERT_REQ_SIMPLE_SERVER(varname, cavarname, fname, \
hostname, ipaddr) \
QCryptoTLSTestCertReq varname = { \
.filename = fname, \
.cn = hostname ? hostname : ipaddr, \
.altname1 = hostname, \
.ipaddr1 = ipaddr, \
.basicConstraintsEnable = true, \
.basicConstraintsCritical = true, \
.basicConstraintsIsCA = false, \
.keyUsageEnable = true, \
.keyUsageCritical = true, \
.keyUsageValue = \
GNUTLS_KEY_DIGITAL_SIGNATURE | GNUTLS_KEY_KEY_ENCIPHERMENT, \
.keyPurposeEnable = true, \
.keyPurposeCritical = true, \
.keyPurposeOID1 = GNUTLS_KP_TLS_WWW_SERVER, \
}; \
test_tls_generate_cert(&varname, cavarname.crt)
extern const asn1_static_node pkix_asn1_tab[];
#endif

View File

@ -512,12 +512,19 @@ int main(int argc, char **argv)
false, true, "wiki.qemu.org", NULL);
TEST_SESS_REG(altname4, cacertreq.filename,
servercertalt1req.filename, clientcertreq.filename,
false, false, "192.168.122.1", NULL);
TEST_SESS_REG(altname5, cacertreq.filename,
servercertalt1req.filename, clientcertreq.filename,
false, false, "fec0::dead:beaf", NULL);
TEST_SESS_REG(altname6, cacertreq.filename,
servercertalt2req.filename, clientcertreq.filename,
false, true, "qemu.org", NULL);
TEST_SESS_REG(altname5, cacertreq.filename,
TEST_SESS_REG(altname7, cacertreq.filename,
servercertalt2req.filename, clientcertreq.filename,
false, false, "www.qemu.org", NULL);
TEST_SESS_REG(altname6, cacertreq.filename,
TEST_SESS_REG(altname8, cacertreq.filename,
servercertalt2req.filename, clientcertreq.filename,
false, false, "wiki.qemu.org", NULL);

View File

@ -444,6 +444,7 @@ static void test_io_channel_unix_fd_pass(void)
G_N_ELEMENTS(iosend),
fdsend,
G_N_ELEMENTS(fdsend),
0,
&error_abort);
qio_channel_readv_full(dst,