Migration pull for 2021-06-09

Yank crash fix from Leo
 RDMA fix from Li
 mptcp support from me
 dirty-rate changes from Hyman and Peter
 
 (Note I've switched to the gitlab I've been using for virtiofs pulls)
 
 Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEERfXHG0oMt/uXep+pBRYzHrxb/ecFAmDA0sEACgkQBRYzHrxb
 /efn9g/+ICIELbh99KRg6GiZ1BxraixpXgNaN6yjWqdHHTa67/pE0p3xHpLuWFnX
 pt4SDCXYWZ1XC3Eq4AMXyQr4yq8tUjsKV7H99WI7e8NvYzNPW7C4MlNkiiRUCxyc
 /SQMrGw6KOByc2PtRBBCW+47aJv1ZaaE09g3s4la6BfT6xeLegncKrxKjWgsxArR
 BhZVhvmDLn3Njp2iMPLGlDU1+gUcCls2DtQ4WebpyJo511BoIDDzZu8qlLNhKQv0
 wCYRB51FsFyZXptELS5NRgPzjbgojp11F6Ugy4qbp/CqMVLBFi78Jq3FSzlLy4IJ
 SRcPGMTxk1grarb/y+ojrQlPn5VU7U2bgRGOTWKW0pav0+5ehRc4To9wMzTQgv83
 tEYwSYCkMBk0Kzr76rxHj39fGnghtsVzd4abXn9Pe5bShebWE1OBriHafDs4fWKV
 MUgxyp0fpBCC1dHYgUDrQyVHWaQt4nDgFw2xlTuZcWEexBRZZEMBl9ugROtKsbqx
 3QXvjn9fw1Vmnf1WPYp80FLwxTGXHi4RHFpIOKzzB5eqf/ggp9YwuuaBMoPhFM2s
 mh4+A9eo4LEaZcDFxEbxAp1WDLrA1ZyKzerySaJXJaCgd9l2i74PJ4pzQq0ZqVsZ
 yRZlbnbn7KrgWqMpqRwiPrxePkwZcTmkPz6i1+BrJlZ8KTCZt4I=
 =eLBm
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/dgilbert-gitlab/tags/pull-migration-20210609a' into staging

Migration pull for 2021-06-09

Yank crash fix from Leo
RDMA fix from Li
mptcp support from me
dirty-rate changes from Hyman and Peter

(Note I've switched to the gitlab I've been using for virtiofs pulls)

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

# gpg: Signature made Wed 09 Jun 2021 15:40:01 BST
# gpg:                using RSA key 45F5C71B4A0CB7FB977A9FA90516331EBC5BFDE7
# gpg: Good signature from "Dr. David Alan Gilbert (RH2) <dgilbert@redhat.com>" [full]
# Primary key fingerprint: 45F5 C71B 4A0C B7FB 977A  9FA9 0516 331E BC5B FDE7

* remotes/dgilbert-gitlab/tags/pull-migration-20210609a:
  hmp: Add "calc_dirty_rate" and "info dirty_rate" cmds
  migration/dirtyrate: make sample page count configurable
  sockets: Support multipath TCP
  migration/socket: Close the listener at the end
  migration: Add cleanup hook for inwards migration
  io/net-listener: Call the notifier during finalize
  channel-socket: Only set CLOEXEC if we have space for fds
  migration/rdma: Fix cm event use after free
  yank: Unregister function when using TLS migration

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2021-06-09 16:40:21 +01:00
commit 7fe7fae8b4
18 changed files with 217 additions and 36 deletions

View File

@ -867,3 +867,16 @@ SRST
``info replay``
Display the record/replay information: mode and the current icount.
ERST
{
.name = "dirty_rate",
.args_type = "",
.params = "",
.help = "show dirty rate information",
.cmd = hmp_info_dirty_rate,
},
SRST
``info dirty_rate``
Display the vcpu dirty rate information.
ERST

View File

@ -1727,3 +1727,17 @@ ERST
.flags = "p",
},
SRST
``calc_dirty_rate`` *second*
Start a round of dirty rate measurement with the period specified in *second*.
The result of the dirty rate measurement may be observed with ``info
dirty_rate`` command.
ERST
{
.name = "calc_dirty_rate",
.args_type = "second:l,sample_pages_per_GB:l?",
.params = "second [sample_pages_per_GB]",
.help = "start a round of guest dirty rate measurement",
.cmd = hmp_calc_dirty_rate,
},

View File

@ -129,5 +129,7 @@ void hmp_info_replay(Monitor *mon, const QDict *qdict);
void hmp_replay_break(Monitor *mon, const QDict *qdict);
void hmp_replay_delete_break(Monitor *mon, const QDict *qdict);
void hmp_replay_seek(Monitor *mon, const QDict *qdict);
void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict);
void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict);
#endif

View File

@ -487,15 +487,15 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
#ifdef MSG_CMSG_CLOEXEC
sflags |= MSG_CMSG_CLOEXEC;
#endif
msg.msg_iov = (struct iovec *)iov;
msg.msg_iovlen = niov;
if (fds && nfds) {
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
#ifdef MSG_CMSG_CLOEXEC
sflags |= MSG_CMSG_CLOEXEC;
#endif
}
retry:

View File

@ -122,6 +122,10 @@ static int qio_dns_resolver_lookup_sync_inet(QIODNSResolver *resolver,
.ipv4 = iaddr->ipv4,
.has_ipv6 = iaddr->has_ipv6,
.ipv6 = iaddr->ipv6,
#ifdef IPPROTO_MPTCP
.has_mptcp = iaddr->has_mptcp,
.mptcp = iaddr->mptcp,
#endif
};
(*addrs)[i] = newaddr;

View File

@ -292,6 +292,9 @@ static void qio_net_listener_finalize(Object *obj)
QIONetListener *listener = QIO_NET_LISTENER(obj);
size_t i;
if (listener->io_notify) {
listener->io_notify(listener->io_data);
}
qio_net_listener_disconnect(listener);
for (i = 0; i < listener->nsioc; i++) {

View File

@ -38,18 +38,19 @@ void migration_channel_process_incoming(QIOChannel *ioc)
trace_migration_set_incoming_channel(
ioc, object_get_typename(OBJECT(ioc)));
if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET)) {
yank_register_function(MIGRATION_YANK_INSTANCE,
migration_yank_iochannel,
QIO_CHANNEL(ioc));
}
if (s->parameters.tls_creds &&
*s->parameters.tls_creds &&
!object_dynamic_cast(OBJECT(ioc),
TYPE_QIO_CHANNEL_TLS)) {
migration_tls_channel_process_incoming(s, ioc, &local_err);
} else {
if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) ||
object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) {
yank_register_function(MIGRATION_YANK_INSTANCE,
migration_yank_iochannel,
QIO_CHANNEL(ioc));
}
migration_ioc_process_incoming(ioc, &local_err);
}
@ -76,12 +77,6 @@ void migration_channel_connect(MigrationState *s,
ioc, object_get_typename(OBJECT(ioc)), hostname, error);
if (!error) {
if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET)) {
yank_register_function(MIGRATION_YANK_INSTANCE,
migration_yank_iochannel,
QIO_CHANNEL(ioc));
}
if (s->parameters.tls_creds &&
*s->parameters.tls_creds &&
!object_dynamic_cast(OBJECT(ioc),
@ -99,6 +94,13 @@ void migration_channel_connect(MigrationState *s,
} else {
QEMUFile *f = qemu_fopen_channel_output(ioc);
if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) ||
object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) {
yank_register_function(MIGRATION_YANK_INSTANCE,
migration_yank_iochannel,
QIO_CHANNEL(ioc));
}
qemu_mutex_lock(&s->qemu_file_lock);
s->to_dst_file = f;
qemu_mutex_unlock(&s->qemu_file_lock);

View File

@ -20,6 +20,9 @@
#include "ram.h"
#include "trace.h"
#include "dirtyrate.h"
#include "monitor/hmp.h"
#include "monitor/monitor.h"
#include "qapi/qmp/qdict.h"
static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED;
static struct DirtyRateStat DirtyStat;
@ -48,6 +51,12 @@ static bool is_sample_period_valid(int64_t sec)
return true;
}
static bool is_sample_pages_valid(int64_t pages)
{
return pages >= MIN_SAMPLE_PAGE_COUNT &&
pages <= MAX_SAMPLE_PAGE_COUNT;
}
static int dirtyrate_set_state(int *state, int old_state, int new_state)
{
assert(new_state < DIRTY_RATE_STATUS__MAX);
@ -72,13 +81,15 @@ static struct DirtyRateInfo *query_dirty_rate_info(void)
info->status = CalculatingState;
info->start_time = DirtyStat.start_time;
info->calc_time = DirtyStat.calc_time;
info->sample_pages = DirtyStat.sample_pages;
trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
return info;
}
static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time)
static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time,
uint64_t sample_pages)
{
DirtyStat.total_dirty_samples = 0;
DirtyStat.total_sample_count = 0;
@ -86,6 +97,7 @@ static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time)
DirtyStat.dirty_rate = -1;
DirtyStat.start_time = start_time;
DirtyStat.calc_time = calc_time;
DirtyStat.sample_pages = sample_pages;
}
static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
@ -361,6 +373,7 @@ void *get_dirtyrate_thread(void *arg)
int ret;
int64_t start_time;
int64_t calc_time;
uint64_t sample_pages;
ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
DIRTY_RATE_STATUS_MEASURING);
@ -371,7 +384,8 @@ void *get_dirtyrate_thread(void *arg)
start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
calc_time = config.sample_period_seconds;
init_dirtyrate_stat(start_time, calc_time);
sample_pages = config.sample_pages_per_gigabytes;
init_dirtyrate_stat(start_time, calc_time, sample_pages);
calculate_dirtyrate(config);
@ -383,7 +397,8 @@ void *get_dirtyrate_thread(void *arg)
return NULL;
}
void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages,
int64_t sample_pages, Error **errp)
{
static struct DirtyRateConfig config;
QemuThread thread;
@ -404,6 +419,17 @@ void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
return;
}
if (has_sample_pages) {
if (!is_sample_pages_valid(sample_pages)) {
error_setg(errp, "sample-pages is out of range[%d, %d].",
MIN_SAMPLE_PAGE_COUNT,
MAX_SAMPLE_PAGE_COUNT);
return;
}
} else {
sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
}
/*
* Init calculation state as unstarted.
*/
@ -415,7 +441,7 @@ void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
}
config.sample_period_seconds = calc_time;
config.sample_pages_per_gigabytes = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
config.sample_pages_per_gigabytes = sample_pages;
qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
(void *)&config, QEMU_THREAD_DETACHED);
}
@ -424,3 +450,47 @@ struct DirtyRateInfo *qmp_query_dirty_rate(Error **errp)
{
return query_dirty_rate_info();
}
void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict)
{
DirtyRateInfo *info = query_dirty_rate_info();
monitor_printf(mon, "Status: %s\n",
DirtyRateStatus_str(info->status));
monitor_printf(mon, "Start Time: %"PRIi64" (ms)\n",
info->start_time);
monitor_printf(mon, "Sample Pages: %"PRIu64" (per GB)\n",
info->sample_pages);
monitor_printf(mon, "Period: %"PRIi64" (sec)\n",
info->calc_time);
monitor_printf(mon, "Dirty rate: ");
if (info->has_dirty_rate) {
monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate);
} else {
monitor_printf(mon, "(not ready)\n");
}
g_free(info);
}
void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict)
{
int64_t sec = qdict_get_try_int(qdict, "second", 0);
int64_t sample_pages = qdict_get_try_int(qdict, "sample_pages_per_GB", -1);
bool has_sample_pages = (sample_pages != -1);
Error *err = NULL;
if (!sec) {
monitor_printf(mon, "Incorrect period length specified!\n");
return;
}
qmp_calc_dirty_rate(sec, has_sample_pages, sample_pages, &err);
if (err) {
hmp_handle_error(mon, err);
return;
}
monitor_printf(mon, "Starting dirty rate measurement with period %"PRIi64
" seconds\n", sec);
monitor_printf(mon, "[Please use 'info dirty_rate' to check results]\n");
}

View File

@ -15,7 +15,6 @@
/*
* Sample 512 pages per GB as default.
* TODO: Make it configurable.
*/
#define DIRTYRATE_DEFAULT_SAMPLE_PAGES 512
@ -35,6 +34,12 @@
#define MIN_FETCH_DIRTYRATE_TIME_SEC 1
#define MAX_FETCH_DIRTYRATE_TIME_SEC 60
/*
* Take 1/16 pages in 1G as the maxmum sample page count
*/
#define MIN_SAMPLE_PAGE_COUNT 128
#define MAX_SAMPLE_PAGE_COUNT 16384
struct DirtyRateConfig {
uint64_t sample_pages_per_gigabytes; /* sample pages per GB */
int64_t sample_period_seconds; /* time duration between two sampling */
@ -63,6 +68,7 @@ struct DirtyRateStat {
int64_t dirty_rate; /* dirty rate in MB/s */
int64_t start_time; /* calculation start time in units of second */
int64_t calc_time; /* time duration of two sampling in units of second */
uint64_t sample_pages; /* sample pages per GB */
};
void *get_dirtyrate_thread(void *arg);

View File

@ -280,6 +280,9 @@ void migration_incoming_state_destroy(void)
g_array_free(mis->postcopy_remote_fds, TRUE);
mis->postcopy_remote_fds = NULL;
}
if (mis->transport_cleanup) {
mis->transport_cleanup(mis->transport_data);
}
qemu_event_reset(&mis->main_thread_load_event);

View File

@ -49,6 +49,10 @@ struct PostcopyBlocktimeContext;
struct MigrationIncomingState {
QEMUFile *from_src_file;
/* A hook to allow cleanup at the end of incoming migration */
void *transport_data;
void (*transport_cleanup)(void *data);
/*
* Free at the start of the main state load, set as the main thread finishes
* loading state.

View File

@ -987,7 +987,8 @@ int multifd_load_cleanup(Error **errp)
for (i = 0; i < migrate_multifd_channels(); i++) {
MultiFDRecvParams *p = &multifd_recv_state->params[i];
if (object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_SOCKET)
if ((object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_SOCKET) ||
object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_TLS))
&& OBJECT(p->c)->ref == 1) {
yank_unregister_function(MIGRATION_YANK_INSTANCE,
migration_yank_iochannel,
@ -1165,6 +1166,11 @@ bool multifd_recv_all_channels_created(void)
return true;
}
if (!multifd_recv_state) {
/* Called before any connections created */
return false;
}
return thread_count == qatomic_read(&multifd_recv_state->count);
}

View File

@ -26,6 +26,7 @@
#include "qemu-file-channel.h"
#include "qemu-file.h"
#include "io/channel-socket.h"
#include "io/channel-tls.h"
#include "qemu/iov.h"
#include "qemu/yank.h"
#include "yank_functions.h"
@ -106,7 +107,8 @@ static int channel_close(void *opaque, Error **errp)
int ret;
QIOChannel *ioc = QIO_CHANNEL(opaque);
ret = qio_channel_close(ioc, errp);
if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET)
if ((object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) ||
object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS))
&& OBJECT(ioc)->ref == 1) {
yank_unregister_function(MIGRATION_YANK_INSTANCE,
migration_yank_iochannel,

View File

@ -1539,16 +1539,20 @@ static int qemu_rdma_wait_comp_channel(RDMAContext *rdma)
if (pfds[1].revents) {
ret = rdma_get_cm_event(rdma->channel, &cm_event);
if (!ret) {
rdma_ack_cm_event(cm_event);
if (ret) {
error_report("failed to get cm event while wait "
"completion channel");
return -EPIPE;
}
error_report("receive cm event while wait comp channel,"
"cm event is %d", cm_event->event);
if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
rdma_ack_cm_event(cm_event);
return -EPIPE;
}
rdma_ack_cm_event(cm_event);
}
break;
@ -3285,7 +3289,6 @@ static void rdma_cm_poll_handler(void *opaque)
error_report("get_cm_event failed %d", errno);
return;
}
rdma_ack_cm_event(cm_event);
if (cm_event->event == RDMA_CM_EVENT_DISCONNECTED ||
cm_event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
@ -3298,12 +3301,14 @@ static void rdma_cm_poll_handler(void *opaque)
rdma->return_path->error_state = -EPIPE;
}
}
rdma_ack_cm_event(cm_event);
if (mis->migration_incoming_co) {
qemu_coroutine_enter(mis->migration_incoming_co);
}
return;
}
rdma_ack_cm_event(cm_event);
}
static int qemu_rdma_accept(RDMAContext *rdma)

View File

@ -126,22 +126,31 @@ static void socket_accept_incoming_migration(QIONetListener *listener,
{
trace_migration_socket_incoming_accepted();
if (migration_has_all_channels()) {
error_report("%s: Extra incoming migration connection; ignoring",
__func__);
return;
}
qio_channel_set_name(QIO_CHANNEL(cioc), "migration-socket-incoming");
migration_channel_process_incoming(QIO_CHANNEL(cioc));
}
static void
socket_incoming_migration_end(void *opaque)
{
QIONetListener *listener = opaque;
if (migration_has_all_channels()) {
/* Close listening socket as its no longer needed */
qio_net_listener_disconnect(listener);
object_unref(OBJECT(listener));
}
}
static void
socket_start_incoming_migration_internal(SocketAddress *saddr,
Error **errp)
{
QIONetListener *listener = qio_net_listener_new();
MigrationIncomingState *mis = migration_incoming_get_current();
size_t i;
int num = 1;
@ -156,6 +165,9 @@ socket_start_incoming_migration_internal(SocketAddress *saddr,
return;
}
mis->transport_data = listener;
mis->transport_cleanup = socket_incoming_migration_end;
qio_net_listener_set_client_func_full(listener,
socket_accept_incoming_migration,
NULL, NULL,

View File

@ -1740,6 +1740,9 @@
#
# @calc-time: time in units of second for sample dirty pages
#
# @sample-pages: page count per GB for sample dirty pages
# the default value is 512 (since 6.1)
#
# Since: 5.2
#
##
@ -1747,7 +1750,8 @@
'data': {'*dirty-rate': 'int64',
'status': 'DirtyRateStatus',
'start-time': 'int64',
'calc-time': 'int64'} }
'calc-time': 'int64',
'sample-pages': 'uint64'} }
##
# @calc-dirty-rate:
@ -1756,13 +1760,18 @@
#
# @calc-time: time in units of second for sample dirty pages
#
# @sample-pages: page count per GB for sample dirty pages
# the default value is 512 (since 6.1)
#
# Since: 5.2
#
# Example:
# {"command": "calc-dirty-rate", "data": {"calc-time": 1} }
# {"command": "calc-dirty-rate", "data": {"calc-time": 1,
# 'sample-pages': 512} }
#
##
{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64'} }
{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64',
'*sample-pages': 'int'} }
##
# @query-dirty-rate:

View File

@ -57,6 +57,8 @@
# @keep-alive: enable keep-alive when connecting to this socket. Not supported
# for passive sockets. (Since 4.2)
#
# @mptcp: enable multi-path TCP. (Since 6.1)
#
# Since: 1.3
##
{ 'struct': 'InetSocketAddress',
@ -66,7 +68,8 @@
'*to': 'uint16',
'*ipv4': 'bool',
'*ipv6': 'bool',
'*keep-alive': 'bool' } }
'*keep-alive': 'bool',
'*mptcp': { 'type': 'bool', 'if': 'defined(IPPROTO_MPTCP)' } } }
##
# @UnixSocketAddress:

View File

@ -278,6 +278,11 @@ static int inet_listen_saddr(InetSocketAddress *saddr,
/* create socket + bind/listen */
for (e = res; e != NULL; e = e->ai_next) {
#ifdef IPPROTO_MPTCP
if (saddr->has_mptcp && saddr->mptcp) {
e->ai_protocol = IPPROTO_MPTCP;
}
#endif
getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen,
uaddr,INET6_ADDRSTRLEN,uport,32,
NI_NUMERICHOST | NI_NUMERICSERV);
@ -456,6 +461,13 @@ int inet_connect_saddr(InetSocketAddress *saddr, Error **errp)
for (e = res; e != NULL; e = e->ai_next) {
error_free(local_err);
local_err = NULL;
#ifdef IPPROTO_MPTCP
if (saddr->has_mptcp && saddr->mptcp) {
e->ai_protocol = IPPROTO_MPTCP;
}
#endif
sock = inet_connect_addr(saddr, e, &local_err);
if (sock >= 0) {
break;
@ -687,6 +699,17 @@ int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
}
addr->has_keep_alive = true;
}
#ifdef IPPROTO_MPTCP
begin = strstr(optstr, ",mptcp");
if (begin) {
if (inet_parse_flag("mptcp", begin + strlen(",mptcp"),
&addr->mptcp, errp) < 0)
{
return -1;
}
addr->has_mptcp = true;
}
#endif
return 0;
}