From 9cf2bab2edca1e651eef49f2417f8f67bdfe49bb Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 17 Jul 2017 12:09:31 +0100 Subject: [PATCH 01/16] migration/rdma: Fix race on source Fix a race where the destination might try and send the source a WRID_READY before the source has done a post-recv for it. rdma_post_recv has to happen after the qp exists, and we're OK since we've already called qemu_rdma_source_init that calls qemu_alloc_qp. This corresponds to: https://bugzilla.redhat.com/show_bug.cgi?id=1285044 The race can be triggered by adding a few ms wait before this post_recv_control (which was originally due to me turning on loads of debug). Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Message-Id: <20170717110936.23314-2-dgilbert@redhat.com> Signed-off-by: Juan Quintela --- migration/rdma.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/migration/rdma.c b/migration/rdma.c index c6bc607a03..6111e10c70 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -2365,6 +2365,12 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp) caps_to_network(&cap); + ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_READY); + if (ret) { + ERROR(errp, "posting second control recv"); + goto err_rdma_source_connect; + } + ret = rdma_connect(rdma->cm_id, &conn_param); if (ret) { perror("rdma_connect"); @@ -2405,12 +2411,6 @@ static int qemu_rdma_connect(RDMAContext *rdma, Error **errp) rdma_ack_cm_event(cm_event); - ret = qemu_rdma_post_recv_control(rdma, RDMA_WRID_READY); - if (ret) { - ERROR(errp, "posting second control recv!"); - goto err_rdma_source_connect; - } - rdma->control_ready_expected = 1; rdma->nb_sent = 0; return 0; From 3a0f2ceaedcf70ff79b671f7fb3b7ec8e71f6e1e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 17 Jul 2017 12:09:32 +0100 Subject: [PATCH 02/16] migration: Close file on failed migration load Closing the file before exit on a failure allows the source to cleanup better, especially with RDMA. Partial fix for https://bugs.launchpad.net/qemu/+bug/1545052 Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Juan Quintela Message-Id: <20170717110936.23314-3-dgilbert@redhat.com> Signed-off-by: Juan Quintela --- migration/migration.c | 1 + 1 file changed, 1 insertion(+) diff --git a/migration/migration.c b/migration/migration.c index a0db40d364..8552f54ab4 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -348,6 +348,7 @@ static void process_incoming_migration_co(void *opaque) migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED); error_report("load of migration failed: %s", strerror(-ret)); + qemu_fclose(mis->from_src_file); exit(EXIT_FAILURE); } mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); From 0b3c15f09715acd78063e720444cc86ac357bab4 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 17 Jul 2017 12:09:33 +0100 Subject: [PATCH 03/16] migration/rdma: fix qemu_rdma_block_for_wrid error paths The two places that 'goto err_block_for_wrid' weren't setting ret and so would end up returning 0 even though we've failed. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Message-Id: <20170717110936.23314-4-dgilbert@redhat.com> Signed-off-by: Juan Quintela --- migration/rdma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/migration/rdma.c b/migration/rdma.c index 6111e10c70..59810aec2e 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -1521,14 +1521,16 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested, yield_until_fd_readable(rdma->comp_channel->fd); } - if (ibv_get_cq_event(rdma->comp_channel, &cq, &cq_ctx)) { + ret = ibv_get_cq_event(rdma->comp_channel, &cq, &cq_ctx); + if (ret) { perror("ibv_get_cq_event"); goto err_block_for_wrid; } num_cq_events++; - if (ibv_req_notify_cq(cq, 0)) { + ret = -ibv_req_notify_cq(cq, 0); + if (ret) { goto err_block_for_wrid; } @@ -1564,6 +1566,8 @@ err_block_for_wrid: if (num_cq_events) { ibv_ack_cq_events(cq, num_cq_events); } + + rdma->error_state = ret; return ret; } From 9c98cfbe72b21d9d84b9ea8d231bde103b9fb7ae Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 17 Jul 2017 12:09:34 +0100 Subject: [PATCH 04/16] migration/rdma: Allow cancelling while waiting for wrid When waiting for a WRID, if the other side dies we end up waiting for ever with no way to cancel the migration. Cure this by poll()ing the fd first with a timeout and checking error flags and migration state. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Message-Id: <20170717110936.23314-5-dgilbert@redhat.com> Signed-off-by: Juan Quintela --- migration/rdma.c | 59 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/migration/rdma.c b/migration/rdma.c index 59810aec2e..0cf55a6d5b 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -1466,6 +1466,56 @@ static uint64_t qemu_rdma_poll(RDMAContext *rdma, uint64_t *wr_id_out, return 0; } +/* Wait for activity on the completion channel. + * Returns 0 on success, none-0 on error. + */ +static int qemu_rdma_wait_comp_channel(RDMAContext *rdma) +{ + /* + * Coroutine doesn't start until migration_fd_process_incoming() + * so don't yield unless we know we're running inside of a coroutine. + */ + if (rdma->migration_started_on_destination) { + yield_until_fd_readable(rdma->comp_channel->fd); + } else { + /* This is the source side, we're in a separate thread + * or destination prior to migration_fd_process_incoming() + * we can't yield; so we have to poll the fd. + * But we need to be able to handle 'cancel' or an error + * without hanging forever. + */ + while (!rdma->error_state && !rdma->received_error) { + GPollFD pfds[1]; + pfds[0].fd = rdma->comp_channel->fd; + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; + /* 0.1s timeout, should be fine for a 'cancel' */ + switch (qemu_poll_ns(pfds, 1, 100 * 1000 * 1000)) { + case 1: /* fd active */ + return 0; + + case 0: /* Timeout, go around again */ + break; + + default: /* Error of some type - + * I don't trust errno from qemu_poll_ns + */ + error_report("%s: poll failed", __func__); + return -EPIPE; + } + + if (migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) { + /* Bail out and let the cancellation happen */ + return -EPIPE; + } + } + } + + if (rdma->received_error) { + return -EPIPE; + } + return rdma->error_state; +} + /* * Block until the next work request has completed. * @@ -1513,12 +1563,9 @@ static int qemu_rdma_block_for_wrid(RDMAContext *rdma, int wrid_requested, } while (1) { - /* - * Coroutine doesn't start until migration_fd_process_incoming() - * so don't yield unless we know we're running inside of a coroutine. - */ - if (rdma->migration_started_on_destination) { - yield_until_fd_readable(rdma->comp_channel->fd); + ret = qemu_rdma_wait_comp_channel(rdma); + if (ret) { + goto err_block_for_wrid; } ret = ibv_get_cq_event(rdma->comp_channel, &cq, &cq_ctx); From 482a33c53cbc9d2b0c47d4df03b659bf50258c21 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 17 Jul 2017 12:09:35 +0100 Subject: [PATCH 05/16] migration/rdma: Safely convert control types control_desc[] is an array of strings that correspond to a series of message types; they're used only for error messages, but if the message type is seriously broken then we could go off the end of the array. Convert the array to a function control_desc() that bound checks. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Reviewed-by: Juan Quintela Message-Id: <20170717110936.23314-6-dgilbert@redhat.com> Signed-off-by: Juan Quintela --- migration/rdma.c | 54 ++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/migration/rdma.c b/migration/rdma.c index 0cf55a6d5b..972167d899 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -165,20 +165,6 @@ enum { RDMA_CONTROL_UNREGISTER_FINISHED, /* unpinning finished */ }; -static const char *control_desc[] = { - [RDMA_CONTROL_NONE] = "NONE", - [RDMA_CONTROL_ERROR] = "ERROR", - [RDMA_CONTROL_READY] = "READY", - [RDMA_CONTROL_QEMU_FILE] = "QEMU FILE", - [RDMA_CONTROL_RAM_BLOCKS_REQUEST] = "RAM BLOCKS REQUEST", - [RDMA_CONTROL_RAM_BLOCKS_RESULT] = "RAM BLOCKS RESULT", - [RDMA_CONTROL_COMPRESS] = "COMPRESS", - [RDMA_CONTROL_REGISTER_REQUEST] = "REGISTER REQUEST", - [RDMA_CONTROL_REGISTER_RESULT] = "REGISTER RESULT", - [RDMA_CONTROL_REGISTER_FINISHED] = "REGISTER FINISHED", - [RDMA_CONTROL_UNREGISTER_REQUEST] = "UNREGISTER REQUEST", - [RDMA_CONTROL_UNREGISTER_FINISHED] = "UNREGISTER FINISHED", -}; /* * Memory and MR structures used to represent an IB Send/Recv work request. @@ -251,6 +237,30 @@ typedef struct QEMU_PACKED RDMADestBlock { uint32_t padding; } RDMADestBlock; +static const char *control_desc(unsigned int rdma_control) +{ + static const char *strs[] = { + [RDMA_CONTROL_NONE] = "NONE", + [RDMA_CONTROL_ERROR] = "ERROR", + [RDMA_CONTROL_READY] = "READY", + [RDMA_CONTROL_QEMU_FILE] = "QEMU FILE", + [RDMA_CONTROL_RAM_BLOCKS_REQUEST] = "RAM BLOCKS REQUEST", + [RDMA_CONTROL_RAM_BLOCKS_RESULT] = "RAM BLOCKS RESULT", + [RDMA_CONTROL_COMPRESS] = "COMPRESS", + [RDMA_CONTROL_REGISTER_REQUEST] = "REGISTER REQUEST", + [RDMA_CONTROL_REGISTER_RESULT] = "REGISTER RESULT", + [RDMA_CONTROL_REGISTER_FINISHED] = "REGISTER FINISHED", + [RDMA_CONTROL_UNREGISTER_REQUEST] = "UNREGISTER REQUEST", + [RDMA_CONTROL_UNREGISTER_FINISHED] = "UNREGISTER FINISHED", + }; + + if (rdma_control > RDMA_CONTROL_UNREGISTER_FINISHED) { + return "??BAD CONTROL VALUE??"; + } + + return strs[rdma_control]; +} + static uint64_t htonll(uint64_t v) { union { uint32_t lv[2]; uint64_t llv; } u; @@ -1641,7 +1651,7 @@ static int qemu_rdma_post_send_control(RDMAContext *rdma, uint8_t *buf, .num_sge = 1, }; - trace_qemu_rdma_post_send_control(control_desc[head->type]); + trace_qemu_rdma_post_send_control(control_desc(head->type)); /* * We don't actually need to do a memcpy() in here if we used @@ -1720,16 +1730,16 @@ static int qemu_rdma_exchange_get_response(RDMAContext *rdma, network_to_control((void *) rdma->wr_data[idx].control); memcpy(head, rdma->wr_data[idx].control, sizeof(RDMAControlHeader)); - trace_qemu_rdma_exchange_get_response_start(control_desc[expecting]); + trace_qemu_rdma_exchange_get_response_start(control_desc(expecting)); if (expecting == RDMA_CONTROL_NONE) { - trace_qemu_rdma_exchange_get_response_none(control_desc[head->type], + trace_qemu_rdma_exchange_get_response_none(control_desc(head->type), head->type); } else if (head->type != expecting || head->type == RDMA_CONTROL_ERROR) { error_report("Was expecting a %s (%d) control message" ", but got: %s (%d), length: %d", - control_desc[expecting], expecting, - control_desc[head->type], head->type, head->len); + control_desc(expecting), expecting, + control_desc(head->type), head->type, head->len); if (head->type == RDMA_CONTROL_ERROR) { rdma->received_error = true; } @@ -1839,7 +1849,7 @@ static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head, } } - trace_qemu_rdma_exchange_send_waiting(control_desc[resp->type]); + trace_qemu_rdma_exchange_send_waiting(control_desc(resp->type)); ret = qemu_rdma_exchange_get_response(rdma, resp, resp->type, RDMA_WRID_DATA); @@ -1851,7 +1861,7 @@ static int qemu_rdma_exchange_send(RDMAContext *rdma, RDMAControlHeader *head, if (resp_idx) { *resp_idx = RDMA_WRID_DATA; } - trace_qemu_rdma_exchange_send_received(control_desc[resp->type]); + trace_qemu_rdma_exchange_send_received(control_desc(resp->type)); } rdma->control_ready_expected = 1; @@ -3401,7 +3411,7 @@ static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque) ret = -EIO; goto out; default: - error_report("Unknown control message %s", control_desc[head.type]); + error_report("Unknown control message %s", control_desc(head.type)); ret = -EIO; goto out; } From 32bce196344772df8d68ab40e2dd1dd57be1aa7c Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 17 Jul 2017 12:09:36 +0100 Subject: [PATCH 06/16] migration/rdma: Send error during cancelling When we issue a cancel and clean up the RDMA channel send a CONTROL_ERROR to get the destination to quit. The rdma_cleanup code waits for the event to come back from the rdma_disconnect; but that wont happen until the destination quits and there's currently nothing to force it. Note this makes the case of a cancel work while the destination is alive, and it already works if the destination is truly dead. Note it doesn't fix the case where the destination is hung (we get stuck waiting for the rdma_disconnect event). Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Message-Id: <20170717110936.23314-7-dgilbert@redhat.com> Signed-off-by: Juan Quintela --- migration/rdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/migration/rdma.c b/migration/rdma.c index 972167d899..ca56594328 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -2269,7 +2269,9 @@ static void qemu_rdma_cleanup(RDMAContext *rdma) int ret, idx; if (rdma->cm_id && rdma->connected) { - if (rdma->error_state && !rdma->received_error) { + if ((rdma->error_state || + migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) && + !rdma->received_error) { RDMAControlHeader head = { .len = 0, .type = RDMA_CONTROL_ERROR, .repeat = 1, From 07d1d063d3235c02f60dc92ec174d419e6f8a750 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 18 Jul 2017 11:39:01 +0800 Subject: [PATCH 07/16] qdev: provide DEFINE_PROP_INT64() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have nearly all the stuff, but this one is missing. Add it in. Am going to use this new helper for MigrationParameters fields, since most of them are int64_t. CC: Markus Armbruster CC: Eduardo Habkost CC: Marc-André Lureau CC: Peter Xu CC: Juan Quintela CC: Marcel Apfelbaum CC: Eric Blake Reviewed-by: Marc-André Lureau Reviewed-by: Marcel Apfelbaum Reviewed-by: Juan Quintela Reviewed-by: Eduardo Habkost Signed-off-by: Peter Xu Message-Id: <1500349150-13240-2-git-send-email-peterx@redhat.com> Signed-off-by: Juan Quintela --- hw/core/qdev-properties.c | 32 ++++++++++++++++++++++++++++++++ include/hw/qdev-properties.h | 3 +++ 2 files changed, 35 insertions(+) diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index dcecdf03e5..c1d4e5480a 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -404,6 +404,31 @@ static void set_uint64(Object *obj, Visitor *v, const char *name, visit_type_uint64(v, name, ptr, errp); } +static void get_int64(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + int64_t *ptr = qdev_get_prop_ptr(dev, prop); + + visit_type_int64(v, name, ptr, errp); +} + +static void set_int64(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + int64_t *ptr = qdev_get_prop_ptr(dev, prop); + + if (dev->realized) { + qdev_prop_set_after_realize(dev, name, errp); + return; + } + + visit_type_int64(v, name, ptr, errp); +} + const PropertyInfo qdev_prop_uint64 = { .name = "uint64", .get = get_uint64, @@ -411,6 +436,13 @@ const PropertyInfo qdev_prop_uint64 = { .set_default_value = set_default_value_uint, }; +const PropertyInfo qdev_prop_int64 = { + .name = "int64", + .get = get_int64, + .set = set_int64, + .set_default_value = set_default_value_int, +}; + /* --- string --- */ static void release_string(Object *obj, const char *name, void *opaque) diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index 39297961f3..e2321f1cc1 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -13,6 +13,7 @@ extern const PropertyInfo qdev_prop_uint16; extern const PropertyInfo qdev_prop_uint32; extern const PropertyInfo qdev_prop_int32; extern const PropertyInfo qdev_prop_uint64; +extern const PropertyInfo qdev_prop_int64; extern const PropertyInfo qdev_prop_size; extern const PropertyInfo qdev_prop_string; extern const PropertyInfo qdev_prop_chr; @@ -157,6 +158,8 @@ extern const PropertyInfo qdev_prop_link; DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_int32, int32_t) #define DEFINE_PROP_UINT64(_n, _s, _f, _d) \ DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_uint64, uint64_t) +#define DEFINE_PROP_INT64(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_int64, int64_t) #define DEFINE_PROP_SIZE(_n, _s, _f, _d) \ DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_size, uint64_t) #define DEFINE_PROP_PCI_DEVFN(_n, _s, _f, _d) \ From 89632fafdc64927647b6f45416307cd3d4c746db Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 18 Jul 2017 11:39:02 +0800 Subject: [PATCH 08/16] migration: export parameters to props Export migration parameters to qdev properties. Then we can use, for example: -global migration.x-cpu-throttle-initial=xxx To specify migration parameters during init. Prefix "x-" is appended for each parameter exported to show that this is not a stable interface, and only for debugging/testing purpose. Reviewed-by: Juan Quintela Signed-off-by: Peter Xu Message-Id: <1500349150-13240-3-git-send-email-peterx@redhat.com> Signed-off-by: Juan Quintela --- migration/migration.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 8552f54ab4..f5ed3333db 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2010,6 +2010,31 @@ static Property migration_properties[] = { send_configuration, true), DEFINE_PROP_BOOL("send-section-footer", MigrationState, send_section_footer, true), + + /* Migration parameters */ + DEFINE_PROP_INT64("x-compress-level", MigrationState, + parameters.compress_level, + DEFAULT_MIGRATE_COMPRESS_LEVEL), + DEFINE_PROP_INT64("x-compress-threads", MigrationState, + parameters.compress_threads, + DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), + DEFINE_PROP_INT64("x-decompress-threads", MigrationState, + parameters.decompress_threads, + DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), + DEFINE_PROP_INT64("x-cpu-throttle-initial", MigrationState, + parameters.cpu_throttle_initial, + DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), + DEFINE_PROP_INT64("x-cpu-throttle-increment", MigrationState, + parameters.cpu_throttle_increment, + DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), + DEFINE_PROP_INT64("x-max-bandwidth", MigrationState, + parameters.max_bandwidth, MAX_THROTTLE), + DEFINE_PROP_INT64("x-downtime-limit", MigrationState, + parameters.downtime_limit, + DEFAULT_MIGRATE_SET_DOWNTIME), + DEFINE_PROP_INT64("x-checkpoint-delay", MigrationState, + parameters.x_checkpoint_delay, + DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), DEFINE_PROP_END_OF_LIST(), }; @@ -2028,16 +2053,6 @@ static void migration_instance_init(Object *obj) ms->state = MIGRATION_STATUS_NONE; ms->xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE; ms->mbps = -1; - ms->parameters = (MigrationParameters) { - .compress_level = DEFAULT_MIGRATE_COMPRESS_LEVEL, - .compress_threads = DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT, - .decompress_threads = DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT, - .cpu_throttle_initial = DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL, - .cpu_throttle_increment = DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT, - .max_bandwidth = MAX_THROTTLE, - .downtime_limit = DEFAULT_MIGRATE_SET_DOWNTIME, - .x_checkpoint_delay = DEFAULT_MIGRATE_X_CHECKPOINT_DELAY, - }; ms->parameters.tls_creds = g_strdup(""); ms->parameters.tls_hostname = g_strdup(""); } From 2081475841fe80f2832677b6cc851cbe79ce44d6 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 18 Jul 2017 11:39:03 +0800 Subject: [PATCH 09/16] migration: export capabilities to props Do the same thing to migration capabilities, just like what we did in previous patch for migration parameters. Reviewed-by: Juan Quintela Reviewed-by: Eduardo Habkost Signed-off-by: Peter Xu Message-Id: <1500349150-13240-4-git-send-email-peterx@redhat.com> Signed-off-by: Juan Quintela --- migration/migration.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index f5ed3333db..16f5d9a715 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2002,6 +2002,9 @@ void migration_global_dump(Monitor *mon) ms->send_configuration, ms->send_section_footer); } +#define DEFINE_PROP_MIG_CAP(name, x) \ + DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) + static Property migration_properties[] = { DEFINE_PROP_BOOL("store-global-state", MigrationState, store_global_state, true), @@ -2035,6 +2038,20 @@ static Property migration_properties[] = { DEFINE_PROP_INT64("x-checkpoint-delay", MigrationState, parameters.x_checkpoint_delay, DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), + + /* Migration capabilities */ + DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), + DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), + DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), + DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), + DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), + DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), + DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), + DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), + DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), + DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), + DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), + DEFINE_PROP_END_OF_LIST(), }; From 16d063bc2a8baeb5466bc5b0fa9b0dbb59b3b08b Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 18 Jul 2017 11:39:04 +0800 Subject: [PATCH 10/16] migration: introduce migrate_params_check() Helper to check the parameters. Abstracted from qmp_migrate_set_parameters(). Reviewed-by: Juan Quintela Signed-off-by: Peter Xu Message-Id: <1500349150-13240-5-git-send-email-peterx@redhat.com> Signed-off-by: Juan Quintela --- migration/migration.c | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 16f5d9a715..02d6987b5c 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -644,64 +644,86 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, } } -void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp) +/* + * Check whether the parameters are valid. Error will be put into errp + * (if provided). Return true if valid, otherwise false. + */ +static bool migrate_params_check(MigrationParameters *params, Error **errp) { - MigrationState *s = migrate_get_current(); - if (params->has_compress_level && (params->compress_level < 0 || params->compress_level > 9)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", "is invalid, it should be in the range of 0 to 9"); - return; + return false; } + if (params->has_compress_threads && (params->compress_threads < 1 || params->compress_threads > 255)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_threads", "is invalid, it should be in the range of 1 to 255"); - return; + return false; } + if (params->has_decompress_threads && (params->decompress_threads < 1 || params->decompress_threads > 255)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "decompress_threads", "is invalid, it should be in the range of 1 to 255"); - return; + return false; } + if (params->has_cpu_throttle_initial && (params->cpu_throttle_initial < 1 || params->cpu_throttle_initial > 99)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu_throttle_initial", "an integer in the range of 1 to 99"); - return; + return false; } + if (params->has_cpu_throttle_increment && (params->cpu_throttle_increment < 1 || params->cpu_throttle_increment > 99)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu_throttle_increment", "an integer in the range of 1 to 99"); - return; + return false; } + if (params->has_max_bandwidth && (params->max_bandwidth < 0 || params->max_bandwidth > SIZE_MAX)) { error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the" " range of 0 to %zu bytes/second", SIZE_MAX); - return; + return false; } + if (params->has_downtime_limit && (params->downtime_limit < 0 || params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { error_setg(errp, "Parameter 'downtime_limit' expects an integer in " "the range of 0 to %d milliseconds", MAX_MIGRATE_DOWNTIME); - return; + return false; } + if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "x_checkpoint_delay", "is invalid, it should be positive"); + return false; + } + + return true; +} + +void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp) +{ + MigrationState *s = migrate_get_current(); + + if (!migrate_params_check(params, errp)) { + /* Invalid parameter */ + return; } if (params->has_compress_level) { From 476c72aa918d583a5c9c105bf764f0bec650fbc9 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 18 Jul 2017 11:39:05 +0800 Subject: [PATCH 11/16] migration: provide migrate_params_apply() Abstracted from qmp_migrate_set_parameters(). Reviewed-by: Juan Quintela Signed-off-by: Peter Xu Message-Id: <1500349150-13240-6-git-send-email-peterx@redhat.com> Signed-off-by: Juan Quintela --- migration/migration.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 02d6987b5c..13d9f007f2 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -717,38 +717,40 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) return true; } -void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp) +static void migrate_params_apply(MigrationParameters *params) { MigrationState *s = migrate_get_current(); - if (!migrate_params_check(params, errp)) { - /* Invalid parameter */ - return; - } - if (params->has_compress_level) { s->parameters.compress_level = params->compress_level; } + if (params->has_compress_threads) { s->parameters.compress_threads = params->compress_threads; } + if (params->has_decompress_threads) { s->parameters.decompress_threads = params->decompress_threads; } + if (params->has_cpu_throttle_initial) { s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; } + if (params->has_cpu_throttle_increment) { s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; } + if (params->has_tls_creds) { g_free(s->parameters.tls_creds); s->parameters.tls_creds = g_strdup(params->tls_creds); } + if (params->has_tls_hostname) { g_free(s->parameters.tls_hostname); s->parameters.tls_hostname = g_strdup(params->tls_hostname); } + if (params->has_max_bandwidth) { s->parameters.max_bandwidth = params->max_bandwidth; if (s->to_dst_file) { @@ -756,6 +758,7 @@ void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp) s->parameters.max_bandwidth / XFER_LIMIT_RATIO); } } + if (params->has_downtime_limit) { s->parameters.downtime_limit = params->downtime_limit; } @@ -766,11 +769,22 @@ void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp) colo_checkpoint_notify(s); } } + if (params->has_block_incremental) { s->parameters.block_incremental = params->block_incremental; } } +void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp) +{ + if (!migrate_params_check(params, errp)) { + /* Invalid parameter */ + return; + } + + migrate_params_apply(params); +} + void qmp_migrate_start_postcopy(Error **errp) { From 8b0b29dcec59730e6b21b253a6b43271cb3d831b Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 18 Jul 2017 11:39:06 +0800 Subject: [PATCH 12/16] migration: check global params for validity Adding validity check for the migration parameters passed in via global properties. Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Message-Id: <1500349150-13240-7-git-send-email-peterx@redhat.com> Signed-off-by: Juan Quintela --- migration/migration.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 13d9f007f2..c59dad9948 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -102,14 +102,22 @@ enum mig_rp_message_type { static MigrationState *current_migration; +static bool migration_object_check(MigrationState *ms, Error **errp); + void migration_object_init(void) { MachineState *ms = MACHINE(qdev_get_machine()); + Error *err = NULL; /* This can only be called once. */ assert(!current_migration); current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION)); + if (!migration_object_check(current_migration, &err)) { + error_report_err(err); + exit(1); + } + /* * We cannot really do this in migration_instance_init() since at * that time global properties are not yet applied, then this @@ -2102,12 +2110,38 @@ static void migration_class_init(ObjectClass *klass, void *data) static void migration_instance_init(Object *obj) { MigrationState *ms = MIGRATION_OBJ(obj); + MigrationParameters *params = &ms->parameters; ms->state = MIGRATION_STATUS_NONE; ms->xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE; ms->mbps = -1; - ms->parameters.tls_creds = g_strdup(""); - ms->parameters.tls_hostname = g_strdup(""); + + params->tls_hostname = g_strdup(""); + params->tls_creds = g_strdup(""); + + /* Set has_* up only for parameter checks */ + params->has_compress_level = true; + params->has_compress_threads = true; + params->has_decompress_threads = true; + params->has_cpu_throttle_initial = true; + params->has_cpu_throttle_increment = true; + params->has_max_bandwidth = true; + params->has_downtime_limit = true; + params->has_x_checkpoint_delay = true; + params->has_block_incremental = true; +} + +/* + * Return true if check pass, false otherwise. Error will be put + * inside errp if provided. + */ +static bool migration_object_check(MigrationState *ms, Error **errp) +{ + if (!migrate_params_check(&ms->parameters, errp)) { + return false; + } + + return true; } static const TypeInfo migration_type = { From fd198f9002a9e1f070c82b04d3229c18d9a49471 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 18 Jul 2017 11:39:07 +0800 Subject: [PATCH 13/16] migration: remove check against colo support Since commit a15215f3 ("build: remove --enable-colo/--disable-colo"), colo is always supported. We don't need any colo_supported() now since it is always true. Removing any extra code that depends on it. CC: Paolo Bonzini CC: Hailiang Zhang Reviewed-by: Juan Quintela Reviewed-by: Zhang Chen Signed-off-by: Peter Xu Message-Id: <1500349150-13240-8-git-send-email-peterx@redhat.com> Signed-off-by: Juan Quintela --- include/migration/colo.h | 1 - migration/colo.c | 5 ----- migration/migration.c | 11 ----------- 3 files changed, 17 deletions(-) diff --git a/include/migration/colo.h b/include/migration/colo.h index be6beba301..ff9874ea16 100644 --- a/include/migration/colo.h +++ b/include/migration/colo.h @@ -15,7 +15,6 @@ #include "qemu-common.h" -bool colo_supported(void); void colo_info_init(void); void migrate_start_colo_process(MigrationState *s); diff --git a/migration/colo.c b/migration/colo.c index ef35f00c9a..a4255432ac 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -29,11 +29,6 @@ static bool vmstate_loading; #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) -bool colo_supported(void) -{ - return true; -} - bool migration_in_colo_state(void) { MigrationState *s = migrate_get_current(); diff --git a/migration/migration.c b/migration/migration.c index c59dad9948..915c8cc11f 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -412,9 +412,6 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) continue; } #endif - if (i == MIGRATION_CAPABILITY_X_COLO && !colo_supported()) { - continue; - } if (head == NULL) { head = g_malloc0(sizeof(*caps)); caps = head; @@ -613,14 +610,6 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, continue; } #endif - if (cap->value->capability == MIGRATION_CAPABILITY_X_COLO) { - if (!colo_supported()) { - error_setg(errp, "COLO is not currently supported, please" - " configure with --enable-colo option in order to" - " support COLO feature"); - continue; - } - } s->enabled_capabilities[cap->value->capability] = cap->value->state; } From 4a84214ebe1695405f58e5c6272d63d6084edfa5 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 18 Jul 2017 11:39:08 +0800 Subject: [PATCH 14/16] migration: provide migrate_caps_check() Abstract helper function to check migration capabilities (from the old qmp_migrate_set_capabilities). Prepare to be used somewhere else. There is side effect on the change: when applying the capabilities, we were skipping the invalid ones, but still applying the valid ones (if they are provided in the same QMP request). After this refactoring, we'll ignore all the capabilities if we detected invalid setup along the way. However, I don't think it is a problem since general users should not provide anything invalid after all. Reviewed-by: Juan Quintela Signed-off-by: Peter Xu Message-Id: <1500349150-13240-9-git-send-email-peterx@redhat.com> Signed-off-by: Juan Quintela --- migration/migration.c | 81 ++++++++++++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 915c8cc11f..74099b361f 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -588,43 +588,49 @@ MigrationInfo *qmp_query_migrate(Error **errp) return info; } -void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - Error **errp) +/** + * @migration_caps_check - check capability validity + * + * @cap_list: old capability list, array of bool + * @params: new capabilities to be applied soon + * @errp: set *errp if the check failed, with reason + * + * Returns true if check passed, otherwise false. + */ +static bool migrate_caps_check(bool *cap_list, + MigrationCapabilityStatusList *params, + Error **errp) { - MigrationState *s = migrate_get_current(); MigrationCapabilityStatusList *cap; - bool old_postcopy_cap = migrate_postcopy_ram(); + bool old_postcopy_cap; - if (migration_is_setup_or_active(s->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return; - } + old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; for (cap = params; cap; cap = cap->next) { -#ifndef CONFIG_LIVE_BLOCK_MIGRATION - if (cap->value->capability == MIGRATION_CAPABILITY_BLOCK - && cap->value->state) { - error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " - "block migration"); - error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); - continue; - } -#endif - s->enabled_capabilities[cap->value->capability] = cap->value->state; + cap_list[cap->value->capability] = cap->value->state; } - if (migrate_postcopy_ram()) { - if (migrate_use_compression()) { +#ifndef CONFIG_LIVE_BLOCK_MIGRATION + if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { + error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " + "block migration"); + error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); + return false; + } +#endif + + if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { + if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { /* The decompression threads asynchronously write into RAM * rather than use the atomic copies needed to avoid * userfaulting. It should be possible to fix the decompression * threads for compatibility in future. */ - error_report("Postcopy is not currently compatible with " - "compression"); - s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM] = - false; + error_setg(errp, "Postcopy is not currently compatible " + "with compression"); + return false; } + /* This check is reasonably expensive, so only when it's being * set the first time, also it's only the destination that needs * special support. @@ -634,11 +640,32 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, /* postcopy_ram_supported_by_host will have emitted a more * detailed message */ - error_report("Postcopy is not supported"); - s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM] = - false; + error_setg(errp, "Postcopy is not supported"); + return false; } } + + return true; +} + +void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + Error **errp) +{ + MigrationState *s = migrate_get_current(); + MigrationCapabilityStatusList *cap; + + if (migration_is_setup_or_active(s->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return; + } + + if (!migrate_caps_check(s->enabled_capabilities, params, errp)) { + return; + } + + for (cap = params; cap; cap = cap->next) { + s->enabled_capabilities[cap->value->capability] = cap->value->state; + } } /* From 4e4a3d3aa691aee305f07abaf7f0a125baa585c6 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 18 Jul 2017 11:39:09 +0800 Subject: [PATCH 15/16] migration: provide migrate_cap_add() Abstracted from migrate_set_block_enabled() to allocate MigrationCapabilityStatusList properly. Reviewed-by: Juan Quintela Signed-off-by: Peter Xu Message-Id: <1500349150-13240-10-git-send-email-peterx@redhat.com> Signed-off-by: Juan Quintela --- migration/migration.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 74099b361f..0ebdde18ed 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -842,14 +842,27 @@ void migrate_set_state(int *state, int old_state, int new_state) } } -void migrate_set_block_enabled(bool value, Error **errp) +static MigrationCapabilityStatusList *migrate_cap_add( + MigrationCapabilityStatusList *list, + MigrationCapability index, + bool state) { MigrationCapabilityStatusList *cap; cap = g_new0(MigrationCapabilityStatusList, 1); cap->value = g_new0(MigrationCapabilityStatus, 1); - cap->value->capability = MIGRATION_CAPABILITY_BLOCK; - cap->value->state = value; + cap->value->capability = index; + cap->value->state = state; + cap->next = list; + + return cap; +} + +void migrate_set_block_enabled(bool value, Error **errp) +{ + MigrationCapabilityStatusList *cap; + + cap = migrate_cap_add(NULL, MIGRATION_CAPABILITY_BLOCK, value); qmp_migrate_set_capabilities(cap, errp); qapi_free_MigrationCapabilityStatusList(cap); } From 6b19a7d91c8de9904c67b87203a46e55db4181ab Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 18 Jul 2017 11:39:10 +0800 Subject: [PATCH 16/16] migration: check global caps for validity Checks validity for all the capabilities that we enabled with command line. Signed-off-by: Peter Xu Reviewed-by: Juan Quintela Message-Id: <1500349150-13240-11-git-send-email-peterx@redhat.com> Signed-off-by: Juan Quintela --- migration/migration.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index 0ebdde18ed..76153914d1 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2166,11 +2166,27 @@ static void migration_instance_init(Object *obj) */ static bool migration_object_check(MigrationState *ms, Error **errp) { + MigrationCapabilityStatusList *head = NULL; + /* Assuming all off */ + bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; + int i; + if (!migrate_params_check(&ms->parameters, errp)) { return false; } - return true; + for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { + if (ms->enabled_capabilities[i]) { + head = migrate_cap_add(head, i, true); + } + } + + ret = migrate_caps_check(cap_list, head, errp); + + /* It works with head == NULL */ + qapi_free_MigrationCapabilityStatusList(head); + + return ret; } static const TypeInfo migration_type = {