migration/rdma: Try to register On-Demand Paging memory region

Previously, for the fsdax mem-backend-file, it will register failed with
Operation not supported. In this case, we can try to register it with
On-Demand Paging[1] like what rpma_mr_reg() does on rpma[2].

[1]: https://community.mellanox.com/s/article/understanding-on-demand-paging--odp-x
[2]: http://pmem.io/rpma/manpages/v0.9.0/rpma_mr_reg.3

CC: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Reviewed-by: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
This commit is contained in:
Li Zhijian 2021-09-10 15:02:54 +08:00 committed by Juan Quintela
parent 5ad15e8614
commit e2daccb0d0
2 changed files with 54 additions and 20 deletions

View File

@ -1117,19 +1117,47 @@ static int qemu_rdma_alloc_qp(RDMAContext *rdma)
return 0; return 0;
} }
/* Check whether On-Demand Paging is supported by RDAM device */
static bool rdma_support_odp(struct ibv_context *dev)
{
struct ibv_device_attr_ex attr = {0};
int ret = ibv_query_device_ex(dev, NULL, &attr);
if (ret) {
return false;
}
if (attr.odp_caps.general_caps & IBV_ODP_SUPPORT) {
return true;
}
return false;
}
static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma) static int qemu_rdma_reg_whole_ram_blocks(RDMAContext *rdma)
{ {
int i; int i;
RDMALocalBlocks *local = &rdma->local_ram_blocks; RDMALocalBlocks *local = &rdma->local_ram_blocks;
for (i = 0; i < local->nb_blocks; i++) { for (i = 0; i < local->nb_blocks; i++) {
int access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE;
local->block[i].mr = local->block[i].mr =
ibv_reg_mr(rdma->pd, ibv_reg_mr(rdma->pd,
local->block[i].local_host_addr, local->block[i].local_host_addr,
local->block[i].length, local->block[i].length, access
IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE
); );
if (!local->block[i].mr &&
errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
access |= IBV_ACCESS_ON_DEMAND;
/* register ODP mr */
local->block[i].mr =
ibv_reg_mr(rdma->pd,
local->block[i].local_host_addr,
local->block[i].length, access);
trace_qemu_rdma_register_odp_mr(local->block[i].block_name);
}
if (!local->block[i].mr) { if (!local->block[i].mr) {
perror("Failed to register local dest ram block!"); perror("Failed to register local dest ram block!");
break; break;
@ -1215,28 +1243,33 @@ static int qemu_rdma_register_and_get_keys(RDMAContext *rdma,
*/ */
if (!block->pmr[chunk]) { if (!block->pmr[chunk]) {
uint64_t len = chunk_end - chunk_start; uint64_t len = chunk_end - chunk_start;
int access = rkey ? IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE :
0;
trace_qemu_rdma_register_and_get_keys(len, chunk_start); trace_qemu_rdma_register_and_get_keys(len, chunk_start);
block->pmr[chunk] = ibv_reg_mr(rdma->pd, block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
chunk_start, len, if (!block->pmr[chunk] &&
(rkey ? (IBV_ACCESS_LOCAL_WRITE | errno == ENOTSUP && rdma_support_odp(rdma->verbs)) {
IBV_ACCESS_REMOTE_WRITE) : 0)); access |= IBV_ACCESS_ON_DEMAND;
/* register ODP mr */
if (!block->pmr[chunk]) { block->pmr[chunk] = ibv_reg_mr(rdma->pd, chunk_start, len, access);
perror("Failed to register chunk!"); trace_qemu_rdma_register_odp_mr(block->block_name);
fprintf(stderr, "Chunk details: block: %d chunk index %d"
" start %" PRIuPTR " end %" PRIuPTR
" host %" PRIuPTR
" local %" PRIuPTR " registrations: %d\n",
block->index, chunk, (uintptr_t)chunk_start,
(uintptr_t)chunk_end, host_addr,
(uintptr_t)block->local_host_addr,
rdma->total_registrations);
return -1;
} }
rdma->total_registrations++;
} }
if (!block->pmr[chunk]) {
perror("Failed to register chunk!");
fprintf(stderr, "Chunk details: block: %d chunk index %d"
" start %" PRIuPTR " end %" PRIuPTR
" host %" PRIuPTR
" local %" PRIuPTR " registrations: %d\n",
block->index, chunk, (uintptr_t)chunk_start,
(uintptr_t)chunk_end, host_addr,
(uintptr_t)block->local_host_addr,
rdma->total_registrations);
return -1;
}
rdma->total_registrations++;
if (lkey) { if (lkey) {
*lkey = block->pmr[chunk]->lkey; *lkey = block->pmr[chunk]->lkey;

View File

@ -212,6 +212,7 @@ qemu_rdma_poll_write(const char *compstr, int64_t comp, int left, uint64_t block
qemu_rdma_poll_other(const char *compstr, int64_t comp, int left) "other completion %s (%" PRId64 ") received left %d" qemu_rdma_poll_other(const char *compstr, int64_t comp, int left) "other completion %s (%" PRId64 ") received left %d"
qemu_rdma_post_send_control(const char *desc) "CONTROL: sending %s.." qemu_rdma_post_send_control(const char *desc) "CONTROL: sending %s.."
qemu_rdma_register_and_get_keys(uint64_t len, void *start) "Registering %" PRIu64 " bytes @ %p" qemu_rdma_register_and_get_keys(uint64_t len, void *start) "Registering %" PRIu64 " bytes @ %p"
qemu_rdma_register_odp_mr(const char *name) "Try to register On-Demand Paging memory region: %s"
qemu_rdma_registration_handle_compress(int64_t length, int index, int64_t offset) "Zapping zero chunk: %" PRId64 " bytes, index %d, offset %" PRId64 qemu_rdma_registration_handle_compress(int64_t length, int index, int64_t offset) "Zapping zero chunk: %" PRId64 " bytes, index %d, offset %" PRId64
qemu_rdma_registration_handle_finished(void) "" qemu_rdma_registration_handle_finished(void) ""
qemu_rdma_registration_handle_ram_blocks(void) "" qemu_rdma_registration_handle_ram_blocks(void) ""