From 71d3612401b614bc64a00fafa8dd930a5672b782 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 9 Jun 2023 00:49:08 +0200 Subject: [PATCH 01/13] migration-test: Create kvm_opts So arch_dirty_ring option becomes one option like the others. Reviewed-by: Peter Xu Message-ID: <20230608224943.3877-8-quintela@redhat.com> Signed-off-by: Juan Quintela --- tests/qtest/migration-test.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 1b43df5ca7..bde553730e 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -710,6 +710,7 @@ static int test_migrate_start(QTestState **from, QTestState **to, g_autofree char *bootpath = NULL; g_autofree char *shmem_opts = NULL; g_autofree char *shmem_path = NULL; + const char *kvm_opts = NULL; const char *arch = qtest_get_arch(); const char *memory_size; @@ -785,13 +786,16 @@ static int test_migrate_start(QTestState **from, QTestState **to, shmem_opts = g_strdup(""); } + if (args->use_dirty_ring) { + kvm_opts = ",dirty-ring-size=4096"; + } + cmd_source = g_strdup_printf("-accel kvm%s -accel tcg " "-name source,debug-threads=on " "-m %s " "-serial file:%s/src_serial " "%s %s %s %s %s", - args->use_dirty_ring ? - ",dirty-ring-size=4096" : "", + kvm_opts ? kvm_opts : "", memory_size, tmpfs, arch_opts ? arch_opts : "", arch_source ? arch_source : "", @@ -811,8 +815,7 @@ static int test_migrate_start(QTestState **from, QTestState **to, "-serial file:%s/dest_serial " "-incoming %s " "%s %s %s %s %s", - args->use_dirty_ring ? - ",dirty-ring-size=4096" : "", + kvm_opts ? kvm_opts : "", memory_size, tmpfs, uri, arch_opts ? arch_opts : "", arch_target ? arch_target : "", From 877cec63d77058c27230b33643508dfeb84d8021 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 9 Jun 2023 00:49:09 +0200 Subject: [PATCH 02/13] migration-test: bootpath is the same for all tests and for all archs So just make it a global variable. Reviewed-by: Peter Xu Message-ID: <20230608224943.3877-9-quintela@redhat.com> Signed-off-by: Juan Quintela --- tests/qtest/migration-test.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index bde553730e..e191f66e54 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -116,6 +116,7 @@ static bool ufd_version_check(void) #endif static char *tmpfs; +static char *bootpath; /* The boot file modifies memory area in [start_address, end_address) * repeatedly. It outputs a 'B' at a fixed rate while it's still running. @@ -124,7 +125,7 @@ static char *tmpfs; #include "tests/migration/aarch64/a-b-kernel.h" #include "tests/migration/s390x/a-b-bios.h" -static void init_bootfile(const char *bootpath, void *content, size_t len) +static void init_bootfile(void *content, size_t len) { FILE *bootfile = fopen(bootpath, "wb"); @@ -707,7 +708,6 @@ static int test_migrate_start(QTestState **from, QTestState **to, g_autofree gchar *cmd_source = NULL; g_autofree gchar *cmd_target = NULL; const gchar *ignore_stderr; - g_autofree char *bootpath = NULL; g_autofree char *shmem_opts = NULL; g_autofree char *shmem_path = NULL; const char *kvm_opts = NULL; @@ -723,17 +723,16 @@ static int test_migrate_start(QTestState **from, QTestState **to, got_src_stop = false; got_dst_resume = false; - bootpath = g_strdup_printf("%s/bootsect", tmpfs); if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { /* the assembled x86 boot sector should be exactly one sector large */ assert(sizeof(x86_bootsect) == 512); - init_bootfile(bootpath, x86_bootsect, sizeof(x86_bootsect)); + init_bootfile(x86_bootsect, sizeof(x86_bootsect)); memory_size = "150M"; arch_opts = g_strdup_printf("-drive file=%s,format=raw", bootpath); start_address = X86_TEST_MEM_START; end_address = X86_TEST_MEM_END; } else if (g_str_equal(arch, "s390x")) { - init_bootfile(bootpath, s390x_elf, sizeof(s390x_elf)); + init_bootfile(s390x_elf, sizeof(s390x_elf)); memory_size = "128M"; arch_opts = g_strdup_printf("-bios %s", bootpath); start_address = S390_TEST_MEM_START; @@ -748,7 +747,7 @@ static int test_migrate_start(QTestState **from, QTestState **to, "until'", end_address, start_address); arch_opts = g_strdup("-nodefaults -machine vsmt=8"); } else if (strcmp(arch, "aarch64") == 0) { - init_bootfile(bootpath, aarch64_kernel, sizeof(aarch64_kernel)); + init_bootfile(aarch64_kernel, sizeof(aarch64_kernel)); memory_size = "150M"; arch_opts = g_strdup_printf("-machine virt,gic-version=max -cpu max " "-kernel %s", bootpath); @@ -866,7 +865,6 @@ static void test_migrate_end(QTestState *from, QTestState *to, bool test_dest) qtest_quit(to); - cleanup("bootsect"); cleanup("migsocket"); cleanup("src_serial"); cleanup("dest_serial"); @@ -2629,12 +2627,10 @@ static QTestState *dirtylimit_start_vm(void) QTestState *vm = NULL; g_autofree gchar *cmd = NULL; const char *arch = qtest_get_arch(); - g_autofree char *bootpath = NULL; assert((strcmp(arch, "x86_64") == 0)); - bootpath = g_strdup_printf("%s/bootsect", tmpfs); assert(sizeof(x86_bootsect) == 512); - init_bootfile(bootpath, x86_bootsect, sizeof(x86_bootsect)); + init_bootfile(x86_bootsect, sizeof(x86_bootsect)); cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 " "-name dirtylimit-test,debug-threads=on " @@ -2650,7 +2646,6 @@ static QTestState *dirtylimit_start_vm(void) static void dirtylimit_stop_vm(QTestState *vm) { qtest_quit(vm); - cleanup("bootsect"); cleanup("vm_serial"); } @@ -2812,6 +2807,7 @@ int main(int argc, char **argv) g_get_tmp_dir(), err->message); } g_assert(tmpfs); + bootpath = g_strdup_printf("%s/bootsect", tmpfs); module_call_init(MODULE_INIT_QOM); @@ -2959,6 +2955,8 @@ int main(int argc, char **argv) g_assert_cmpint(ret, ==, 0); + cleanup("bootsect"); + g_free(bootpath); ret = rmdir(tmpfs); if (ret != 0) { g_test_message("unable to rmdir: path (%s): %s", From 0c690d3e2a3eb73a6c27afb66ec87534c1259bae Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 9 Jun 2023 00:49:10 +0200 Subject: [PATCH 03/13] migration-test: Add bootfile_create/delete() functions The bootsector code is read only from the guest (otherwise we are going to have problems with it being read from both source and destination). Create a single copy for all the tests. Reviewed-by: Peter Xu Message-ID: <20230608224943.3877-10-quintela@redhat.com> Signed-off-by: Juan Quintela --- tests/qtest/migration-test.c | 50 ++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index e191f66e54..f601249391 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -125,14 +125,47 @@ static char *bootpath; #include "tests/migration/aarch64/a-b-kernel.h" #include "tests/migration/s390x/a-b-bios.h" -static void init_bootfile(void *content, size_t len) +static void bootfile_create(char *dir) { + const char *arch = qtest_get_arch(); + unsigned char *content; + size_t len; + + bootpath = g_strdup_printf("%s/bootsect", dir); + if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { + /* the assembled x86 boot sector should be exactly one sector large */ + g_assert(sizeof(x86_bootsect) == 512); + content = x86_bootsect; + len = sizeof(x86_bootsect); + } else if (g_str_equal(arch, "s390x")) { + content = s390x_elf; + len = sizeof(s390x_elf); + } else if (strcmp(arch, "ppc64") == 0) { + /* + * sane architectures can be programmed at the boot prompt + */ + return; + } else if (strcmp(arch, "aarch64") == 0) { + content = aarch64_kernel; + len = sizeof(aarch64_kernel); + g_assert(sizeof(aarch64_kernel) <= ARM_TEST_MAX_KERNEL_SIZE); + } else { + g_assert_not_reached(); + } + FILE *bootfile = fopen(bootpath, "wb"); g_assert_cmpint(fwrite(content, len, 1, bootfile), ==, 1); fclose(bootfile); } +static void bootfile_delete(void) +{ + unlink(bootpath); + g_free(bootpath); + bootpath = NULL; +} + /* * Wait for some output in the serial output file, * we get an 'A' followed by an endless string of 'B's @@ -724,15 +757,11 @@ static int test_migrate_start(QTestState **from, QTestState **to, got_src_stop = false; got_dst_resume = false; if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { - /* the assembled x86 boot sector should be exactly one sector large */ - assert(sizeof(x86_bootsect) == 512); - init_bootfile(x86_bootsect, sizeof(x86_bootsect)); memory_size = "150M"; arch_opts = g_strdup_printf("-drive file=%s,format=raw", bootpath); start_address = X86_TEST_MEM_START; end_address = X86_TEST_MEM_END; } else if (g_str_equal(arch, "s390x")) { - init_bootfile(s390x_elf, sizeof(s390x_elf)); memory_size = "128M"; arch_opts = g_strdup_printf("-bios %s", bootpath); start_address = S390_TEST_MEM_START; @@ -747,14 +776,11 @@ static int test_migrate_start(QTestState **from, QTestState **to, "until'", end_address, start_address); arch_opts = g_strdup("-nodefaults -machine vsmt=8"); } else if (strcmp(arch, "aarch64") == 0) { - init_bootfile(aarch64_kernel, sizeof(aarch64_kernel)); memory_size = "150M"; arch_opts = g_strdup_printf("-machine virt,gic-version=max -cpu max " "-kernel %s", bootpath); start_address = ARM_TEST_MEM_START; end_address = ARM_TEST_MEM_END; - - g_assert(sizeof(aarch64_kernel) <= ARM_TEST_MAX_KERNEL_SIZE); } else { g_assert_not_reached(); } @@ -2629,9 +2655,6 @@ static QTestState *dirtylimit_start_vm(void) const char *arch = qtest_get_arch(); assert((strcmp(arch, "x86_64") == 0)); - assert(sizeof(x86_bootsect) == 512); - init_bootfile(x86_bootsect, sizeof(x86_bootsect)); - cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 " "-name dirtylimit-test,debug-threads=on " "-m 150M -smp 1 " @@ -2807,7 +2830,7 @@ int main(int argc, char **argv) g_get_tmp_dir(), err->message); } g_assert(tmpfs); - bootpath = g_strdup_printf("%s/bootsect", tmpfs); + bootfile_create(tmpfs); module_call_init(MODULE_INIT_QOM); @@ -2955,8 +2978,7 @@ int main(int argc, char **argv) g_assert_cmpint(ret, ==, 0); - cleanup("bootsect"); - g_free(bootpath); + bootfile_delete(); ret = rmdir(tmpfs); if (ret != 0) { g_test_message("unable to rmdir: path (%s): %s", From 22d3c6e16c69ea581eef6f7ff8ebb2e63107d3f5 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 9 Jun 2023 00:49:11 +0200 Subject: [PATCH 04/13] migration-test: dirtylimit checks for x86_64 arch before So no need to assert we are in x86_64. Once there, refactor the function to remove useless variables. Reviewed-by: Peter Xu Message-ID: <20230608224943.3877-11-quintela@redhat.com> Signed-off-by: Juan Quintela --- tests/qtest/migration-test.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index f601249391..334648ae19 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -2651,10 +2651,7 @@ static int64_t get_limit_rate(QTestState *who) static QTestState *dirtylimit_start_vm(void) { QTestState *vm = NULL; - g_autofree gchar *cmd = NULL; - const char *arch = qtest_get_arch(); - - assert((strcmp(arch, "x86_64") == 0)); + g_autofree gchar * cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 " "-name dirtylimit-test,debug-threads=on " "-m 150M -smp 1 " From 0368ace8f9eb24c5959466db352e4c4afc734954 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 9 Jun 2023 00:49:04 +0200 Subject: [PATCH 05/13] migration-test: simplify shmem_opts handling Reviewed-by: Peter Xu Message-ID: <20230608224943.3877-4-quintela@redhat.com> Signed-off-by: Juan Quintela --- tests/qtest/migration-test.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 334648ae19..46f1c275a2 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -806,9 +806,6 @@ static int test_migrate_start(QTestState **from, QTestState **to, "-object memory-backend-file,id=mem0,size=%s" ",mem-path=%s,share=on -numa node,memdev=mem0", memory_size, shmem_path); - } else { - shmem_path = NULL; - shmem_opts = g_strdup(""); } if (args->use_dirty_ring) { @@ -824,7 +821,7 @@ static int test_migrate_start(QTestState **from, QTestState **to, memory_size, tmpfs, arch_opts ? arch_opts : "", arch_source ? arch_source : "", - shmem_opts, + shmem_opts ? shmem_opts : "", args->opts_source ? args->opts_source : "", ignore_stderr); if (!args->only_target) { @@ -844,7 +841,7 @@ static int test_migrate_start(QTestState **from, QTestState **to, memory_size, tmpfs, uri, arch_opts ? arch_opts : "", arch_target ? arch_target : "", - shmem_opts, + shmem_opts ? shmem_opts : "", args->opts_target ? args->opts_target : "", ignore_stderr); *to = qtest_init(cmd_target); From f4e1b613362e51e205081a60b94f157c16acdca3 Mon Sep 17 00:00:00 2001 From: Tejus GK Date: Wed, 21 Jun 2023 13:09:40 +0000 Subject: [PATCH 06/13] migration: Refactor repeated call of yank_unregister_instance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the function qmp_migrate(), yank_unregister_instance() gets called twice which isn't required. Hence, refactoring it so that it gets called during the local_error cleanup. Reviewed-by: Daniel P. Berrangé Reviewed-by: Juan Quintela Acked-by: Peter Xu Signed-off-by: Tejus GK Message-ID: <20230621130940.178659-3-tejus.gk@nutanix.com> Signed-off-by: Juan Quintela --- migration/migration.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index e2ed85b5be..6d3cf5d5cd 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1703,15 +1703,11 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, } else if (strstart(uri, "fd:", &p)) { fd_start_outgoing_migration(s, p, &local_err); } else { - if (!resume_requested) { - yank_unregister_instance(MIGRATION_YANK_INSTANCE); - } error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE, "uri", "a valid migration protocol"); migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED); block_cleanup_parameters(); - return; } if (local_err) { From f16ecfa9f9c147168630422a6f4a4c0eddfbe574 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Tue, 30 May 2023 20:39:24 +0200 Subject: [PATCH 07/13] migration: Use qemu_file_transferred_noflush() for block migration. We only care about the amount of bytes transferred. Flushing is done by the system somewhere else. Reviewed-by: Fabiano Rosas Signed-off-by: Juan Quintela Message-ID: <20230530183941.7223-4-quintela@redhat.com> Signed-off-by: Juan Quintela --- migration/block.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/migration/block.c b/migration/block.c index 86c2256a2b..26c10c0a0c 100644 --- a/migration/block.c +++ b/migration/block.c @@ -755,7 +755,7 @@ static int block_save_setup(QEMUFile *f, void *opaque) static int block_save_iterate(QEMUFile *f, void *opaque) { int ret; - uint64_t last_bytes = qemu_file_transferred(f); + uint64_t last_bytes = qemu_file_transferred_noflush(f); trace_migration_block_save("iterate", block_mig_state.submitted, block_mig_state.transferred); @@ -807,7 +807,7 @@ static int block_save_iterate(QEMUFile *f, void *opaque) } qemu_put_be64(f, BLK_MIG_FLAG_EOS); - uint64_t delta_bytes = qemu_file_transferred(f) - last_bytes; + uint64_t delta_bytes = qemu_file_transferred_noflush(f) - last_bytes; return (delta_bytes > 0); } From 67c31c9c1af1bb8f7df8275cc8731629e2690f89 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 15 May 2023 21:57:03 +0200 Subject: [PATCH 08/13] migration: Don't abuse qemu_file transferred for RDMA Just create a variable for it, the same way that multifd does. This way it is safe to use for other thread, etc, etc. Reviewed-by: Leonardo Bras Signed-off-by: Juan Quintela Message-Id: <20230515195709.63843-11-quintela@redhat.com> --- migration/migration-stats.c | 5 +++-- migration/migration-stats.h | 4 ++++ migration/rdma.c | 22 ++++++++++++++++++++-- migration/trace-events | 2 +- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/migration/migration-stats.c b/migration/migration-stats.c index 095d6d75bb..84e11e6dd8 100644 --- a/migration/migration-stats.c +++ b/migration/migration-stats.c @@ -61,8 +61,9 @@ void migration_rate_reset(QEMUFile *f) uint64_t migration_transferred_bytes(QEMUFile *f) { uint64_t multifd = stat64_get(&mig_stats.multifd_bytes); + uint64_t rdma = stat64_get(&mig_stats.rdma_bytes); uint64_t qemu_file = qemu_file_transferred(f); - trace_migration_transferred_bytes(qemu_file, multifd); - return qemu_file + multifd; + trace_migration_transferred_bytes(qemu_file, multifd, rdma); + return qemu_file + multifd + rdma; } diff --git a/migration/migration-stats.h b/migration/migration-stats.h index ac2260e987..2358caad63 100644 --- a/migration/migration-stats.h +++ b/migration/migration-stats.h @@ -89,6 +89,10 @@ typedef struct { * Maximum amount of data we can send in a cycle. */ Stat64 rate_limit_max; + /* + * Number of bytes sent through RDMA. + */ + Stat64 rdma_bytes; /* * Total number of bytes transferred. */ diff --git a/migration/rdma.c b/migration/rdma.c index a2a3db35b1..2a3c784328 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -2122,9 +2122,18 @@ retry: return -EIO; } + /* + * TODO: Here we are sending something, but we are not + * accounting for anything transferred. The following is wrong: + * + * stat64_add(&mig_stats.rdma_bytes, sge.length); + * + * because we are using some kind of compression. I + * would think that head.len would be the more similar + * thing to a correct value. + */ stat64_add(&mig_stats.zero_pages, sge.length / qemu_target_page_size()); - return 1; } @@ -2232,8 +2241,17 @@ retry: set_bit(chunk, block->transit_bitmap); stat64_add(&mig_stats.normal_pages, sge.length / qemu_target_page_size()); + /* + * We are adding to transferred the amount of data written, but no + * overhead at all. I will asume that RDMA is magicaly and don't + * need to transfer (at least) the addresses where it wants to + * write the pages. Here it looks like it should be something + * like: + * sizeof(send_wr) + sge.length + * but this being RDMA, who knows. + */ + stat64_add(&mig_stats.rdma_bytes, sge.length); ram_transferred_add(sge.length); - qemu_file_credit_transfer(f, sge.length); rdma->total_writes++; return 0; diff --git a/migration/trace-events b/migration/trace-events index 4666f19325..63483732ce 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -191,7 +191,7 @@ process_incoming_migration_co_postcopy_end_main(void) "" postcopy_preempt_enabled(bool value) "%d" # migration-stats -migration_transferred_bytes(uint64_t qemu_file, uint64_t multifd) "qemu_file %" PRIu64 " multifd %" PRIu64 +migration_transferred_bytes(uint64_t qemu_file, uint64_t multifd, uint64_t rdma) "qemu_file %" PRIu64 " multifd %" PRIu64 " RDMA %" PRIu64 # channel.c migration_set_incoming_channel(void *ioc, const char *ioctype) "ioc=%p ioctype=%s" From 19df4f3226c0f3e80291a40aec3c9c459dadfdf4 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 15 May 2023 21:57:04 +0200 Subject: [PATCH 09/13] migration/RDMA: It is accounting for zero/normal pages in two places Remove the one in control_save_page(). Reviewed-by: Leonardo Bras Signed-off-by: Juan Quintela Message-Id: <20230515195709.63843-12-quintela@redhat.com> --- migration/ram.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 9040d66e61..f2c5b07919 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1204,13 +1204,6 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, if (ret == RAM_SAVE_CONTROL_DELAYED) { return true; } - - if (bytes_xmit > 0) { - stat64_add(&mig_stats.normal_pages, 1); - } else if (bytes_xmit == 0) { - stat64_add(&mig_stats.zero_pages, 1); - } - return true; } From e33780351ceb8317dccec143e722ae8434d58c34 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 15 May 2023 21:57:05 +0200 Subject: [PATCH 10/13] migration/rdma: Remove QEMUFile parameter when not used Reviewed-by: Leonardo Bras Signed-off-by: Juan Quintela Message-Id: <20230515195709.63843-13-quintela@redhat.com> --- migration/rdma.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/migration/rdma.c b/migration/rdma.c index 2a3c784328..9007261b5c 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -2027,7 +2027,7 @@ static int qemu_rdma_exchange_recv(RDMAContext *rdma, RDMAControlHeader *head, * If we're using dynamic registration on the dest-side, we have to * send a registration command first. */ -static int qemu_rdma_write_one(QEMUFile *f, RDMAContext *rdma, +static int qemu_rdma_write_one(RDMAContext *rdma, int current_index, uint64_t current_addr, uint64_t length) { @@ -2263,7 +2263,7 @@ retry: * We support sending out multiple chunks at the same time. * Not all of them need to get signaled in the completion queue. */ -static int qemu_rdma_write_flush(QEMUFile *f, RDMAContext *rdma) +static int qemu_rdma_write_flush(RDMAContext *rdma) { int ret; @@ -2271,7 +2271,7 @@ static int qemu_rdma_write_flush(QEMUFile *f, RDMAContext *rdma) return 0; } - ret = qemu_rdma_write_one(f, rdma, + ret = qemu_rdma_write_one(rdma, rdma->current_index, rdma->current_addr, rdma->current_length); if (ret < 0) { @@ -2344,7 +2344,7 @@ static inline int qemu_rdma_buffer_mergable(RDMAContext *rdma, * and only require that a batch gets acknowledged in the completion * queue instead of each individual chunk. */ -static int qemu_rdma_write(QEMUFile *f, RDMAContext *rdma, +static int qemu_rdma_write(RDMAContext *rdma, uint64_t block_offset, uint64_t offset, uint64_t len) { @@ -2355,7 +2355,7 @@ static int qemu_rdma_write(QEMUFile *f, RDMAContext *rdma, /* If we cannot merge it, we flush the current buffer first. */ if (!qemu_rdma_buffer_mergable(rdma, current_addr, len)) { - ret = qemu_rdma_write_flush(f, rdma); + ret = qemu_rdma_write_flush(rdma); if (ret) { return ret; } @@ -2377,7 +2377,7 @@ static int qemu_rdma_write(QEMUFile *f, RDMAContext *rdma, /* flush it if buffer is too large */ if (rdma->current_length >= RDMA_MERGE_MAX) { - return qemu_rdma_write_flush(f, rdma); + return qemu_rdma_write_flush(rdma); } return 0; @@ -2798,7 +2798,6 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, Error **errp) { QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); - QEMUFile *f = rioc->file; RDMAContext *rdma; int ret; ssize_t done = 0; @@ -2819,7 +2818,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, * Push out any writes that * we're queued up for VM's ram. */ - ret = qemu_rdma_write_flush(f, rdma); + ret = qemu_rdma_write_flush(rdma); if (ret < 0) { rdma->error_state = ret; error_setg(errp, "qemu_rdma_write_flush returned %d", ret); @@ -2958,11 +2957,11 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, /* * Block until all the outstanding chunks have been delivered by the hardware. */ -static int qemu_rdma_drain_cq(QEMUFile *f, RDMAContext *rdma) +static int qemu_rdma_drain_cq(RDMAContext *rdma) { int ret; - if (qemu_rdma_write_flush(f, rdma) < 0) { + if (qemu_rdma_write_flush(rdma) < 0) { return -EIO; } @@ -3273,7 +3272,7 @@ static size_t qemu_rdma_save_page(QEMUFile *f, * is full, or the page doesn't belong to the current chunk, * an actual RDMA write will occur and a new chunk will be formed. */ - ret = qemu_rdma_write(f, rdma, block_offset, offset, size); + ret = qemu_rdma_write(rdma, block_offset, offset, size); if (ret < 0) { error_report("rdma migration: write error! %d", ret); goto err; @@ -3928,7 +3927,7 @@ static int qemu_rdma_registration_stop(QEMUFile *f, CHECK_ERROR_STATE(); qemu_fflush(f); - ret = qemu_rdma_drain_cq(f, rdma); + ret = qemu_rdma_drain_cq(rdma); if (ret < 0) { goto err; From 2ebe5d4d5aa4d11f02a2d52fa398a52a6a0dc2ee Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 15 May 2023 21:57:06 +0200 Subject: [PATCH 11/13] migration/rdma: Don't use imaginary transfers RDMA protocol is completely asynchronous, so in qemu_rdma_save_page() they "invent" that a byte has been transferred. And then they call qemu_file_credit_transfer() and ram_transferred_add() with that byte. Just remove that calls as nothing has been sent. Reviewed-by: Leonardo Bras Signed-off-by: Juan Quintela Message-Id: <20230515195709.63843-14-quintela@redhat.com> --- migration/qemu-file.c | 5 +---- migration/ram.c | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 19c33c9985..e53ff2dd86 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -332,13 +332,10 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, if (ret != RAM_SAVE_CONTROL_DELAYED && ret != RAM_SAVE_CONTROL_NOT_SUPP) { - if (bytes_sent && *bytes_sent > 0) { - qemu_file_credit_transfer(f, *bytes_sent); - } else if (ret < 0) { + if (ret < 0) { qemu_file_set_error(f, ret); } } - return ret; } diff --git a/migration/ram.c b/migration/ram.c index f2c5b07919..c6238f7a8b 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1197,7 +1197,6 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, } if (bytes_xmit) { - ram_transferred_add(bytes_xmit); *pages = 1; } From 9f51fe92392f601a177687bef01a545298cb47e1 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 15 May 2023 21:57:07 +0200 Subject: [PATCH 12/13] migration: Remove unused qemu_file_credit_transfer() After this change, nothing abuses QEMUFile to account for data transferrefd during migration. Reviewed-by: Leonardo Bras Signed-off-by: Juan Quintela Message-Id: <20230515195709.63843-15-quintela@redhat.com> --- migration/qemu-file.c | 5 ----- migration/qemu-file.h | 8 -------- 2 files changed, 13 deletions(-) diff --git a/migration/qemu-file.c b/migration/qemu-file.c index e53ff2dd86..5c43fa34e7 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -397,11 +397,6 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f) return len; } -void qemu_file_credit_transfer(QEMUFile *f, size_t size) -{ - f->total_transferred += size; -} - /** Closes the file * * Returns negative error value if any error happened on previous operations or diff --git a/migration/qemu-file.h b/migration/qemu-file.h index 47015f5201..57b00c8562 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h @@ -119,14 +119,6 @@ bool qemu_file_buffer_empty(QEMUFile *file); */ int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset); void qemu_file_skip(QEMUFile *f, int size); -/* - * qemu_file_credit_transfer: - * - * Report on a number of bytes that have been transferred - * out of band from the main file object I/O methods. This - * accounting information tracks the total migration traffic. - */ -void qemu_file_credit_transfer(QEMUFile *f, size_t size); int qemu_file_get_error_obj_any(QEMUFile *f1, QEMUFile *f2, Error **errp); void qemu_file_set_error_obj(QEMUFile *f, int ret, Error *err); void qemu_file_set_error(QEMUFile *f, int ret); From 9c53d369e5903375a2e3358f739be77dcb8dae49 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Mon, 15 May 2023 21:57:08 +0200 Subject: [PATCH 13/13] migration/rdma: Simplify the function that saves a page When we sent a page through QEMUFile hooks (RDMA) there are three posiblities: - We are not using RDMA. return RAM_SAVE_CONTROL_DELAYED and control_save_page() returns false to let anything else to proceed. - There is one error but we are using RDMA. Then we return a negative value, control_save_page() needs to return true. - Everything goes well and RDMA start the sent of the page asynchronously. It returns RAM_SAVE_CONTROL_DELAYED and we need to return 1 for ram_save_page_legacy. Clear? I know, I know, the interface is as bad as it gets. I think that now it is a bit clearer, but this needs to be done some other way. Reviewed-by: Leonardo Bras Signed-off-by: Juan Quintela Message-Id: <20230515195709.63843-16-quintela@redhat.com> --- migration/qemu-file.c | 12 ++++++------ migration/qemu-file.h | 14 ++++++-------- migration/ram.c | 10 +++------- migration/rdma.c | 19 +++---------------- 4 files changed, 18 insertions(+), 37 deletions(-) diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 5c43fa34e7..5e8207dae4 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -322,14 +322,14 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data) } } -size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, - ram_addr_t offset, size_t size, - uint64_t *bytes_sent) +int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, + ram_addr_t offset, size_t size) { if (f->hooks && f->hooks->save_page) { - int ret = f->hooks->save_page(f, block_offset, - offset, size, bytes_sent); - + int ret = f->hooks->save_page(f, block_offset, offset, size); + /* + * RAM_SAVE_CONTROL_* are negative values + */ if (ret != RAM_SAVE_CONTROL_DELAYED && ret != RAM_SAVE_CONTROL_NOT_SUPP) { if (ret < 0) { diff --git a/migration/qemu-file.h b/migration/qemu-file.h index 57b00c8562..03e718c264 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h @@ -49,11 +49,10 @@ typedef int (QEMURamHookFunc)(QEMUFile *f, uint64_t flags, void *data); * This function allows override of where the RAM page * is saved (such as RDMA, for example.) */ -typedef size_t (QEMURamSaveFunc)(QEMUFile *f, - ram_addr_t block_offset, - ram_addr_t offset, - size_t size, - uint64_t *bytes_sent); +typedef int (QEMURamSaveFunc)(QEMUFile *f, + ram_addr_t block_offset, + ram_addr_t offset, + size_t size); typedef struct QEMUFileHooks { QEMURamHookFunc *before_ram_iterate; @@ -142,9 +141,8 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data); #define RAM_SAVE_CONTROL_NOT_SUPP -1000 #define RAM_SAVE_CONTROL_DELAYED -2000 -size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, - ram_addr_t offset, size_t size, - uint64_t *bytes_sent); +int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, + ram_addr_t offset, size_t size); QIOChannel *qemu_file_get_ioc(QEMUFile *file); #endif diff --git a/migration/ram.c b/migration/ram.c index c6238f7a8b..99cff591a3 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1186,23 +1186,19 @@ static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block, static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, ram_addr_t offset, int *pages) { - uint64_t bytes_xmit = 0; int ret; - *pages = -1; ret = ram_control_save_page(pss->pss_channel, block->offset, offset, - TARGET_PAGE_SIZE, &bytes_xmit); + TARGET_PAGE_SIZE); if (ret == RAM_SAVE_CONTROL_NOT_SUPP) { return false; } - if (bytes_xmit) { - *pages = 1; - } - if (ret == RAM_SAVE_CONTROL_DELAYED) { + *pages = 1; return true; } + *pages = ret; return true; } diff --git a/migration/rdma.c b/migration/rdma.c index 9007261b5c..5748c9045b 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -3240,13 +3240,12 @@ qio_channel_rdma_shutdown(QIOChannel *ioc, * * @size : Number of bytes to transfer * - * @bytes_sent : User-specificed pointer to indicate how many bytes were + * @pages_sent : User-specificed pointer to indicate how many pages were * sent. Usually, this will not be more than a few bytes of * the protocol because most transfers are sent asynchronously. */ -static size_t qemu_rdma_save_page(QEMUFile *f, - ram_addr_t block_offset, ram_addr_t offset, - size_t size, uint64_t *bytes_sent) +static int qemu_rdma_save_page(QEMUFile *f, ram_addr_t block_offset, + ram_addr_t offset, size_t size) { QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f)); RDMAContext *rdma; @@ -3278,18 +3277,6 @@ static size_t qemu_rdma_save_page(QEMUFile *f, goto err; } - /* - * We always return 1 bytes because the RDMA - * protocol is completely asynchronous. We do not yet know - * whether an identified chunk is zero or not because we're - * waiting for other pages to potentially be merged with - * the current chunk. So, we have to call qemu_update_position() - * later on when the actual write occurs. - */ - if (bytes_sent) { - *bytes_sent = 1; - } - /* * Drain the Completion Queue if possible, but do not block, * just poll.