Migration Pull request (3rd try)

Hi
 
 This should fix all the freebsd problems.
 
 Please apply,
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEGJn/jt6/WMzuA0uC9IfvGFhy1yMFAmFuj9MACgkQ9IfvGFhy
 1yOmphAAxXZ4yLJP9TjhPnjngzENClk9JdRy7eH6Z3wAvx8wXGYY9gBFSxqybdsY
 9WClDARPBcKFgUo6WtoSd3uolT67QaMMH/m8lggJ3D/J8DiIQrF999f57a/SKsEf
 y/PoiaWdPy23KtAD+G/HXYWVraH6ub5OHhRveObb0EzepsramcT55Soa1JGiUyb0
 O3DONlKOfBaVc27VpWtKbw5epoa9sxqfnvo1qdv2iXn0aEtRa05X4pMhaI1FaBcP
 z913Ez5fbejLyS719lawlzDXdJgDf8SGMqr4CUYXZyzKvf1iz9YFxpQHR8Q/h8oH
 Pck4HsMoPXtecvFguLCsUXkXk2PpSfClvOtsDRVpP1RvA/CxamFkTSrIgHxpWzan
 MWbaaTa32UWwFMvOEARRNd1obGTgOyBue7lm68wbKdmQDYqRCbiENmV1FDhIIj27
 JK2bv3QQr9Y7a3ohMcPG4bGEvMWSMj+jnZr1cRFkL4yJO8qTyRrBn0M0H5ANm1Ni
 Jj1bx6Q4QXAeWEdZD9jMHIB+6TU75arElPeCAlcnyNLmQ/ejP9mQIoraIn79RUCJ
 borVhpyPMtwA5BKoYajvfiFz6oSc4mvFLNEXKYJtiQpmbXdBfNoj40hxCSEJxgtc
 xm2nFN4d2i0SRcbJsCzT7ogrWYgUnZ7ppvPM93AKHQQvgSdUfOw=
 =ue55
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/juanquintela/tags/migration.next-pull-request' into staging

Migration Pull request (3rd try)

Hi

This should fix all the freebsd problems.

Please apply,

# gpg: Signature made Tue 19 Oct 2021 02:28:51 AM PDT
# gpg:                using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full]
# gpg:                 aka "Juan Quintela <quintela@trasno.org>" [full]

* remotes/juanquintela/tags/migration.next-pull-request:
  migration/rdma: advise prefetch write for ODP region
  migration/rdma: Try to register On-Demand Paging memory region
  migration: allow enabling mutilfd for specific protocol only
  migration: allow multifd for socket protocol only
  migration/ram: Don't passs RAMState to migration_clear_memory_region_dirty_bitmap_*()
  multifd: Unconditionally unregister yank function
  multifd: Implement yank for multifd send side

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2021-10-19 07:41:04 -07:00
commit 50352cce13
7 changed files with 151 additions and 34 deletions

View File

@ -1530,6 +1530,12 @@ config_host_data.set('HAVE_COPY_FILE_RANGE', cc.has_function('copy_file_range'))
config_host_data.set('HAVE_OPENPTY', cc.has_function('openpty', dependencies: util))
config_host_data.set('HAVE_STRCHRNUL', cc.has_function('strchrnul'))
config_host_data.set('HAVE_SYSTEM_FUNCTION', cc.has_function('system', prefix: '#include <stdlib.h>'))
if rdma.found()
config_host_data.set('HAVE_IBV_ADVISE_MR',
cc.has_function('ibv_advise_mr',
args: config_host['RDMA_LIBS'].split(),
prefix: '#include <infiniband/verbs.h>'))
endif
# has_header_symbol
config_host_data.set('CONFIG_BYTESWAP_H',

View File

@ -453,10 +453,12 @@ static void qemu_start_incoming_migration(const char *uri, Error **errp)
{
const char *p = NULL;
migrate_protocol_allow_multifd(false); /* reset it anyway */
qapi_event_send_migration(MIGRATION_STATUS_SETUP);
if (strstart(uri, "tcp:", &p) ||
strstart(uri, "unix:", NULL) ||
strstart(uri, "vsock:", NULL)) {
migrate_protocol_allow_multifd(true);
socket_start_incoming_migration(p ? p : uri, errp);
#ifdef CONFIG_RDMA
} else if (strstart(uri, "rdma:", &p)) {
@ -1235,6 +1237,14 @@ static bool migrate_caps_check(bool *cap_list,
}
}
/* incoming side only */
if (runstate_check(RUN_STATE_INMIGRATE) &&
!migrate_multifd_is_allowed() &&
cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
error_setg(errp, "multifd is not supported by current protocol");
return false;
}
return true;
}
@ -2280,9 +2290,11 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
}
}
migrate_protocol_allow_multifd(false);
if (strstart(uri, "tcp:", &p) ||
strstart(uri, "unix:", NULL) ||
strstart(uri, "vsock:", NULL)) {
migrate_protocol_allow_multifd(true);
socket_start_outgoing_migration(s, p ? p : uri, &local_err);
#ifdef CONFIG_RDMA
} else if (strstart(uri, "rdma:", &p)) {

View File

@ -531,7 +531,7 @@ void multifd_save_cleanup(void)
{
int i;
if (!migrate_use_multifd()) {
if (!migrate_use_multifd() || !migrate_multifd_is_allowed()) {
return;
}
multifd_send_terminate_threads(NULL);
@ -546,6 +546,9 @@ void multifd_save_cleanup(void)
MultiFDSendParams *p = &multifd_send_state->params[i];
Error *local_err = NULL;
if (p->registered_yank) {
migration_ioc_unregister_yank(p->c);
}
socket_send_channel_destroy(p->c);
p->c = NULL;
qemu_mutex_destroy(&p->mutex);
@ -813,7 +816,8 @@ static bool multifd_channel_connect(MultiFDSendParams *p,
return false;
}
} else {
/* update for tls qio channel */
migration_ioc_register_yank(ioc);
p->registered_yank = true;
p->c = ioc;
qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
QEMU_THREAD_JOINABLE);
@ -864,6 +868,17 @@ cleanup:
multifd_new_send_channel_cleanup(p, sioc, local_err);
}
static bool migrate_allow_multifd = true;
void migrate_protocol_allow_multifd(bool allow)
{
migrate_allow_multifd = allow;
}
bool migrate_multifd_is_allowed(void)
{
return migrate_allow_multifd;
}
int multifd_save_setup(Error **errp)
{
int thread_count;
@ -874,6 +889,11 @@ int multifd_save_setup(Error **errp)
if (!migrate_use_multifd()) {
return 0;
}
if (!migrate_multifd_is_allowed()) {
error_setg(errp, "multifd is not supported by current protocol");
return -1;
}
s = migrate_get_current();
thread_count = migrate_multifd_channels();
multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
@ -967,7 +987,7 @@ int multifd_load_cleanup(Error **errp)
{
int i;
if (!migrate_use_multifd()) {
if (!migrate_use_multifd() || !migrate_multifd_is_allowed()) {
return 0;
}
multifd_recv_terminate_threads(NULL);
@ -987,10 +1007,7 @@ int multifd_load_cleanup(Error **errp)
for (i = 0; i < migrate_multifd_channels(); i++) {
MultiFDRecvParams *p = &multifd_recv_state->params[i];
if (OBJECT(p->c)->ref == 1) {
migration_ioc_unregister_yank(p->c);
}
migration_ioc_unregister_yank(p->c);
object_unref(OBJECT(p->c));
p->c = NULL;
qemu_mutex_destroy(&p->mutex);
@ -1119,6 +1136,10 @@ int multifd_load_setup(Error **errp)
if (!migrate_use_multifd()) {
return 0;
}
if (!migrate_multifd_is_allowed()) {
error_setg(errp, "multifd is not supported by current protocol");
return -1;
}
thread_count = migrate_multifd_channels();
multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);

View File

@ -13,6 +13,8 @@
#ifndef QEMU_MIGRATION_MULTIFD_H
#define QEMU_MIGRATION_MULTIFD_H
bool migrate_multifd_is_allowed(void);
void migrate_protocol_allow_multifd(bool allow);
int multifd_save_setup(Error **errp);
void multifd_save_cleanup(void);
int multifd_load_setup(Error **errp);
@ -85,6 +87,8 @@ typedef struct {
bool running;
/* should this thread finish */
bool quit;
/* is the yank function registered */
bool registered_yank;
/* thread has work to do */
int pending_job;
/* array of pages to sent */

View File

@ -789,8 +789,7 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
return find_next_bit(bitmap, size, start);
}
static void migration_clear_memory_region_dirty_bitmap(RAMState *rs,
RAMBlock *rb,
static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb,
unsigned long page)
{
uint8_t shift;
@ -818,8 +817,7 @@ static void migration_clear_memory_region_dirty_bitmap(RAMState *rs,
}
static void
migration_clear_memory_region_dirty_bitmap_range(RAMState *rs,
RAMBlock *rb,
migration_clear_memory_region_dirty_bitmap_range(RAMBlock *rb,
unsigned long start,
unsigned long npages)
{
@ -832,7 +830,7 @@ migration_clear_memory_region_dirty_bitmap_range(RAMState *rs,
* exclusive.
*/
for (i = chunk_start; i < chunk_end; i += chunk_pages) {
migration_clear_memory_region_dirty_bitmap(rs, rb, i);
migration_clear_memory_region_dirty_bitmap(rb, i);
}
}
@ -850,7 +848,7 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
* the page in the chunk we clear the remote dirty bitmap for all.
* Clearing it earlier won't be a problem, but too late will.
*/
migration_clear_memory_region_dirty_bitmap(rs, rb, page);
migration_clear_memory_region_dirty_bitmap(rb, page);
ret = test_and_clear_bit(page, rb->bmap);
if (ret) {
@ -2777,8 +2775,7 @@ void qemu_guest_free_page_hint(void *addr, size_t len)
* are initially set. Otherwise those skipped pages will be sent in
* the next round after syncing from the memory region bitmap.
*/
migration_clear_memory_region_dirty_bitmap_range(ram_state, block,
start, npages);
migration_clear_memory_region_dirty_bitmap_range(block, start, npages);
ram_state->migration_dirty_pages -=
bitmap_count_one_with_offset(block->bmap, start, npages);
bitmap_clear(block->bmap, start, npages);

View File

@ -1117,19 +1117,82 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma)
return 0;
}
/* Check whether On-Demand Paging is supported by RDAM device */
static bool rdma_support_odp(struct ibv_context *dev)
{
struct ibv_device_attr_ex attr = {0};
int ret = ibv_query_device_ex(dev, NULL, &attr);
if (ret) {
return false;
}
if (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) {
return true;
}
return false;
}
/*
* ibv_advise_mr to avoid RNR NAK error as far as possible.
* The responder mr registering with ODP will sent RNR NAK back to
* the requester in the face of the page fault.
*/
static void qemu_rdma_advise_prefetch_mr(struct ibv_pd *pd, uint64_t addr,
uint32_t len, uint32_t lkey,
const char *name, bool wr)
{
#ifdef HAVE_IBV_ADVISE_MR
int ret;
int advice = wr ? IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE :
IBV_ADVISE_MR_ADVICE_PREFETCH;
struct ibv_sge sg_list = {.lkey = lkey, .addr = addr, .length = len};
ret = ibv_advise_mr(pd, advice,
IBV_ADVISE_MR_FLAG_FLUSH, &sg_list, 1);
/* ignore the error */
if (ret) {
trace_qemu_rdma_advise_mr(name, len, addr, strerror(errno));
} else {
trace_qemu_rdma_advise_mr(name, len, addr, "successed");
}
#endif
}
static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma)
{
int i;
RDMALocalBlocks *local = &rdma->local_ram_blocks;
for (i = 0; i < local->nb_blocks; i++) {
int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE;
local->block[i].mr =
ibv_reg_mr(rdma->pd,
local->block[i].local_host_addr,
local->block[i].length,
IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE
local->block[i].length, access
);
if (!local->block[i].mr &&
errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
access |= IBV_ACCESS_ON_DEMAND;
/* register ODP mr */
local->block[i].mr =
ibv_reg_mr(rdma->pd,
local->block[i].local_host_addr,
local->block[i].length, access);
trace_qemu_rdma_register_odp_mr(local->block[i].block_name);
if (local->block[i].mr) {
qemu_rdma_advise_prefetch_mr(rdma->pd,
(uintptr_t)local->block[i].local_host_addr,
local->block[i].length,
local->block[i].mr->lkey,
local->block[i].block_name,
true);
}
}
if (!local->block[i].mr) {
perror("Failed to register local dest ram block!");
break;
@ -1215,28 +1278,40 @@ static int qemu_rdma_register_and_get_keys(RDMAContext *rdma,
*/
if (!block->pmr[chunk]) {
uint64_t len = chunk_end - chunk_start;
int access = rkey ? IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE :
0;
trace_qemu_rdma_register_and_get_keys(len, chunk_start);
block->pmr[chunk] = ibv_reg_mr(rdma->pd,
chunk_start, len,
(rkey ? (IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE) : 0));
block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
if (!block->pmr[chunk] &&
errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
access |= IBV_ACCESS_ON_DEMAND;
/* register ODP mr */
block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
trace_qemu_rdma_register_odp_mr(block->block_name);
if (!block->pmr[chunk]) {
perror("Failed to register chunk!");
fprintf(stderr, "Chunk details: block: %d chunk index %d"
" start %" PRIuPTR " end %" PRIuPTR
" host %" PRIuPTR
" local %" PRIuPTR " registrations: %d\n",
block->index, chunk, (uintptr_t)chunk_start,
(uintptr_t)chunk_end, host_addr,
(uintptr_t)block->local_host_addr,
rdma->total_registrations);
return -1;
if (block->pmr[chunk]) {
qemu_rdma_advise_prefetch_mr(rdma->pd, (uintptr_t)chunk_start,
len, block->pmr[chunk]->lkey,
block->block_name, rkey);
}
}
rdma->total_registrations++;
}
if (!block->pmr[chunk]) {
perror("Failed to register chunk!");
fprintf(stderr, "Chunk details: block: %d chunk index %d"
" start %" PRIuPTR " end %" PRIuPTR
" host %" PRIuPTR
" local %" PRIuPTR " registrations: %d\n",
block->index, chunk, (uintptr_t)chunk_start,
(uintptr_t)chunk_end, host_addr,
(uintptr_t)block->local_host_addr,
rdma->total_registrations);
return -1;
}
rdma->total_registrations++;
if (lkey) {
*lkey = block->pmr[chunk]->lkey;

View File

@ -212,6 +212,8 @@ qemu_rdma_poll_write(const char *compstr, int64_t comp, int left, uint64_t block
qemu_rdma_poll_other(const char *compstr, int64_t comp, int left) "other completion %s (%" PRId64 ") received left %d"
qemu_rdma_post_send_control(const char *desc) "CONTROL: sending %s.."
qemu_rdma_register_and_get_keys(uint64_t len, void *start) "Registering %" PRIu64 " bytes @ %p"
qemu_rdma_register_odp_mr(const char *name) "Try to register On-Demand Paging memory region: %s"
qemu_rdma_advise_mr(const char *name, uint32_t len, uint64_t addr, const char *res) "Try to advise block %s prefetch at %" PRIu32 "@0x%" PRIx64 ": %s"
qemu_rdma_registration_handle_compress(int64_t length, int index, int64_t offset) "Zapping zero chunk: %" PRId64 " bytes, index %d, offset %" PRId64
qemu_rdma_registration_handle_finished(void) ""
qemu_rdma_registration_handle_ram_blocks(void) ""