From c02039a6f3730ddcf683a0ba9a175688c6db71a0 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 8 Sep 2020 16:30:17 -0400 Subject: [PATCH 01/26] migration: Properly destroy variables on incoming side In migration_incoming_state_destroy(), we've got a few variables that aren't destroyed properly, namely: main_thread_load_event postcopy_pause_sem_dst postcopy_pause_sem_fault rp_mutex Destroy them properly. Signed-off-by: Peter Xu Message-Id: <20200908203022.341615-2-peterx@redhat.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index d9d1e0b190..3495c9e542 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -238,12 +238,15 @@ void migration_incoming_state_destroy(void) mis->postcopy_remote_fds = NULL; } - qemu_event_reset(&mis->main_thread_load_event); - if (mis->socket_address_list) { qapi_free_SocketAddressList(mis->socket_address_list); mis->socket_address_list = NULL; } + + qemu_event_destroy(&mis->main_thread_load_event); + qemu_sem_destroy(&mis->postcopy_pause_sem_dst); + qemu_sem_destroy(&mis->postcopy_pause_sem_fault); + qemu_mutex_destroy(&mis->rp_mutex); } static void migrate_generate_event(int new_state) From 2e2bce167ee7d54a339bbc4208dfb47979ec59b9 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 8 Sep 2020 16:30:18 -0400 Subject: [PATCH 02/26] migration: Rework migrate_send_rp_req_pages() function We duplicated the logic of maintaining the last_rb variable at both callers of this function. Pass *rb pointer into the function so that we can avoid duplicating the logic. Also, when we have the rb pointer, it's also easier to remove the original 2nd & 4th parameters, because both of them (name of the ramblock when needed, or the page size) can be fetched from the ramblock pointer too. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Peter Xu Message-Id: <20200908203022.341615-3-peterx@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 26 ++++++++++++++++++-------- migration/migration.h | 4 ++-- migration/postcopy-ram.c | 24 ++---------------------- 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 3495c9e542..827f8be07d 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -314,25 +314,35 @@ error: return ret; } -/* Request a range of pages from the source VM at the given - * start address. - * rbname: Name of the RAMBlock to request the page in, if NULL it's the same - * as the last request (a name must have been given previously) +/* Request one page from the source VM at the given start address. + * rb: the RAMBlock to request the page in * Start: Address offset within the RB * Len: Length in bytes required - must be a multiple of pagesize */ -int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname, - ram_addr_t start, size_t len) +int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb, + ram_addr_t start) { uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */ size_t msglen = 12; /* start + len */ + size_t len = qemu_ram_pagesize(rb); enum mig_rp_message_type msg_type; + const char *rbname; + int rbname_len; *(uint64_t *)bufc = cpu_to_be64((uint64_t)start); *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len); - if (rbname) { - int rbname_len = strlen(rbname); + /* + * We maintain the last ramblock that we requested for page. Note that we + * don't need locking because this function will only be called within the + * postcopy ram fault thread. + */ + if (rb != mis->last_rb) { + mis->last_rb = rb; + + rbname = qemu_ram_get_idstr(rb); + rbname_len = strlen(rbname); + assert(rbname_len < 256); bufc[msglen++] = rbname_len; diff --git a/migration/migration.h b/migration/migration.h index bdc7450da3..e74042362d 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -326,8 +326,8 @@ void migrate_send_rp_shut(MigrationIncomingState *mis, uint32_t value); void migrate_send_rp_pong(MigrationIncomingState *mis, uint32_t value); -int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rbname, - ram_addr_t start, size_t len); +int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb, + ram_addr_t start); void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, char *block_name); void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 1654ff11a5..0a2f88a87d 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -684,14 +684,7 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb, qemu_ram_get_idstr(rb), rb_offset); return postcopy_wake_shared(pcfd, client_addr, rb); } - if (rb != mis->last_rb) { - mis->last_rb = rb; - migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb), - aligned_rbo, pagesize); - } else { - /* Save some space */ - migrate_send_rp_req_pages(mis, NULL, aligned_rbo, pagesize); - } + migrate_send_rp_req_pages(mis, rb, aligned_rbo); return 0; } @@ -986,20 +979,7 @@ retry: * Send the request to the source - we want to request one * of our host page sizes (which is >= TPS) */ - if (rb != mis->last_rb) { - mis->last_rb = rb; - ret = migrate_send_rp_req_pages(mis, - qemu_ram_get_idstr(rb), - rb_offset, - qemu_ram_pagesize(rb)); - } else { - /* Save some space */ - ret = migrate_send_rp_req_pages(mis, - NULL, - rb_offset, - qemu_ram_pagesize(rb)); - } - + ret = migrate_send_rp_req_pages(mis, rb, rb_offset); if (ret) { /* May be network failure, try to wait for recovery */ if (ret == -EIO && postcopy_pause_fault_thread(mis)) { From 4240dceeb3362d7589faf2ea88235ca91ae447e4 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:21:56 +0800 Subject: [PATCH 03/26] migration/dirtyrate: setup up query-dirtyrate framwork Add get_dirtyrate_thread() functions to setup query-dirtyrate framework. Signed-off-by: Chuan Zheng Signed-off-by: YanYing Zhuang Reviewed-by: Dr. David Alan Gilbert Reviewed-by: David Edmondson Reviewed-by: Li Qiang Message-Id: <1600237327-33618-2-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 38 ++++++++++++++++++++++++++++++++++++++ migration/dirtyrate.h | 28 ++++++++++++++++++++++++++++ migration/meson.build | 2 +- 3 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 migration/dirtyrate.c create mode 100644 migration/dirtyrate.h diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c new file mode 100644 index 0000000000..bf7fd24e75 --- /dev/null +++ b/migration/dirtyrate.c @@ -0,0 +1,38 @@ +/* + * Dirtyrate implement code + * + * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD. + * + * Authors: + * Chuan Zheng + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "cpu.h" +#include "qemu/config-file.h" +#include "exec/memory.h" +#include "exec/ramblock.h" +#include "exec/target_page.h" +#include "qemu/rcu_queue.h" +#include "qapi/qapi-commands-migration.h" +#include "migration.h" +#include "dirtyrate.h" + +static void calculate_dirtyrate(struct DirtyRateConfig config) +{ + /* todo */ + return; +} + +void *get_dirtyrate_thread(void *arg) +{ + struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg; + + calculate_dirtyrate(config); + + return NULL; +} diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h new file mode 100644 index 0000000000..84ab9409ac --- /dev/null +++ b/migration/dirtyrate.h @@ -0,0 +1,28 @@ +/* + * Dirtyrate common functions + * + * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD. + * + * Authors: + * Chuan Zheng + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_MIGRATION_DIRTYRATE_H +#define QEMU_MIGRATION_DIRTYRATE_H + +/* + * Sample 512 pages per GB as default. + * TODO: Make it configurable. + */ +#define DIRTYRATE_DEFAULT_SAMPLE_PAGES 512 + +struct DirtyRateConfig { + uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ + int64_t sample_period_seconds; /* time duration between two sampling */ +}; + +void *get_dirtyrate_thread(void *arg); +#endif diff --git a/migration/meson.build b/migration/meson.build index ac8ff1419f..b5b71c8060 100644 --- a/migration/meson.build +++ b/migration/meson.build @@ -37,4 +37,4 @@ softmmu_ss.add(when: ['CONFIG_RDMA', rdma], if_true: files('rdma.c')) softmmu_ss.add(when: 'CONFIG_LIVE_BLOCK_MIGRATION', if_true: files('block.c')) softmmu_ss.add(when: 'CONFIG_ZSTD', if_true: [files('multifd-zstd.c'), zstd]) -specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: files('ram.c')) +specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: files('dirtyrate.c', 'ram.c')) From 7df3aa30838c010ce9b2f587afdcd6609386f630 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:21:57 +0800 Subject: [PATCH 04/26] migration/dirtyrate: add DirtyRateStatus to denote calculation status add DirtyRateStatus to denote calculating status. Signed-off-by: Chuan Zheng Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Li Qiang Message-Id: <1600237327-33618-3-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert atomic name fixup --- migration/dirtyrate.c | 26 ++++++++++++++++++++++++++ qapi/migration.json | 17 +++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index bf7fd24e75..3edf000f45 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -22,6 +22,19 @@ #include "migration.h" #include "dirtyrate.h" +static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; + +static int dirtyrate_set_state(int *state, int old_state, int new_state) +{ + assert(new_state < DIRTY_RATE_STATUS__MAX); + if (qatomic_cmpxchg(state, old_state, new_state) == old_state) { + return 0; + } else { + return -1; + } +} + + static void calculate_dirtyrate(struct DirtyRateConfig config) { /* todo */ @@ -31,8 +44,21 @@ static void calculate_dirtyrate(struct DirtyRateConfig config) void *get_dirtyrate_thread(void *arg) { struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg; + int ret; + + ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED, + DIRTY_RATE_STATUS_MEASURING); + if (ret == -1) { + error_report("change dirtyrate state failed."); + return NULL; + } calculate_dirtyrate(config); + ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING, + DIRTY_RATE_STATUS_MEASURED); + if (ret == -1) { + error_report("change dirtyrate state failed."); + } return NULL; } diff --git a/qapi/migration.json b/qapi/migration.json index 675f70bb67..76a59b4f92 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1720,3 +1720,20 @@ ## { 'event': 'UNPLUG_PRIMARY', 'data': { 'device-id': 'str' } } + +## +# @DirtyRateStatus: +# +# An enumeration of dirtyrate status. +# +# @unstarted: the dirtyrate thread has not been started. +# +# @measuring: the dirtyrate thread is measuring. +# +# @measured: the dirtyrate thread has measured and results are available. +# +# Since: 5.2 +# +## +{ 'enum': 'DirtyRateStatus', + 'data': [ 'unstarted', 'measuring', 'measured'] } From a2635f0a7508c3291c6bd2d88ea31b6ae7dfa341 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:21:58 +0800 Subject: [PATCH 05/26] migration/dirtyrate: Add RamblockDirtyInfo to store sampled page info Add RamblockDirtyInfo to store sampled page info of each ramblock. Signed-off-by: Chuan Zheng Reviewed-by: Dr. David Alan Gilbert Reviewed-by: David Edmondson Reviewed-by: Li Qiang Message-Id: <1600237327-33618-4-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h index 84ab9409ac..8707df852d 100644 --- a/migration/dirtyrate.h +++ b/migration/dirtyrate.h @@ -19,10 +19,28 @@ */ #define DIRTYRATE_DEFAULT_SAMPLE_PAGES 512 +/* + * Record ramblock idstr + */ +#define RAMBLOCK_INFO_MAX_LEN 256 + struct DirtyRateConfig { uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ int64_t sample_period_seconds; /* time duration between two sampling */ }; +/* + * Store dirtypage info for each ramblock. + */ +struct RamblockDirtyInfo { + char idstr[RAMBLOCK_INFO_MAX_LEN]; /* idstr for each ramblock */ + uint8_t *ramblock_addr; /* base address of ramblock we measure */ + uint64_t ramblock_pages; /* ramblock size in TARGET_PAGE_SIZE */ + uint64_t *sample_page_vfn; /* relative offset address for sampled page */ + uint64_t sample_pages_count; /* count of sampled pages */ + uint64_t sample_dirty_count; /* count of dirty pages we measure */ + uint32_t *hash_result; /* array of hash result for sampled pages */ +}; + void *get_dirtyrate_thread(void *arg); #endif From c9a58d719ba7b059d53d1b84ff3458be68cce5c8 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:21:59 +0800 Subject: [PATCH 06/26] migration/dirtyrate: Add dirtyrate statistics series functions Add dirtyrate statistics functions to record/update dirtyrate info. Signed-off-by: Chuan Zheng Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Li Qiang Message-Id: <1600237327-33618-5-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 32 ++++++++++++++++++++++++++++++++ migration/dirtyrate.h | 12 ++++++++++++ 2 files changed, 44 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 3edf000f45..94c4e173bc 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -23,6 +23,7 @@ #include "dirtyrate.h" static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; +static struct DirtyRateStat DirtyStat; static int dirtyrate_set_state(int *state, int old_state, int new_state) { @@ -34,6 +35,37 @@ static int dirtyrate_set_state(int *state, int old_state, int new_state) } } +static void reset_dirtyrate_stat(void) +{ + DirtyStat.total_dirty_samples = 0; + DirtyStat.total_sample_count = 0; + DirtyStat.total_block_mem_MB = 0; + DirtyStat.dirty_rate = -1; + DirtyStat.start_time = 0; + DirtyStat.calc_time = 0; +} + +static void update_dirtyrate_stat(struct RamblockDirtyInfo *info) +{ + DirtyStat.total_dirty_samples += info->sample_dirty_count; + DirtyStat.total_sample_count += info->sample_pages_count; + /* size of total pages in MB */ + DirtyStat.total_block_mem_MB += (info->ramblock_pages * + TARGET_PAGE_SIZE) >> 20; +} + +static void update_dirtyrate(uint64_t msec) +{ + uint64_t dirtyrate; + uint64_t total_dirty_samples = DirtyStat.total_dirty_samples; + uint64_t total_sample_count = DirtyStat.total_sample_count; + uint64_t total_block_mem_MB = DirtyStat.total_block_mem_MB; + + dirtyrate = total_dirty_samples * total_block_mem_MB * + 1000 / (total_sample_count * msec); + + DirtyStat.dirty_rate = dirtyrate; +} static void calculate_dirtyrate(struct DirtyRateConfig config) { diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h index 8707df852d..312debca6f 100644 --- a/migration/dirtyrate.h +++ b/migration/dirtyrate.h @@ -42,5 +42,17 @@ struct RamblockDirtyInfo { uint32_t *hash_result; /* array of hash result for sampled pages */ }; +/* + * Store calculation statistics for each measure. + */ +struct DirtyRateStat { + uint64_t total_dirty_samples; /* total dirty sampled page */ + uint64_t total_sample_count; /* total sampled pages */ + uint64_t total_block_mem_MB; /* size of total sampled pages in MB */ + int64_t dirty_rate; /* dirty rate in MB/s */ + int64_t start_time; /* calculation start time in units of second */ + int64_t calc_time; /* time duration of two sampling in units of second */ +}; + void *get_dirtyrate_thread(void *arg); #endif From 3ded54b1bd00177bf56d332636ecaf0da9d8b77e Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:00 +0800 Subject: [PATCH 07/26] migration/dirtyrate: move RAMBLOCK_FOREACH_MIGRATABLE into ram.h RAMBLOCK_FOREACH_MIGRATABLE is need in dirtyrate measure, move the existing definition up into migration/ram.h Signed-off-by: Chuan Zheng Reviewed-by: Dr. David Alan Gilbert Reviewed-by: David Edmondson Reviewed-by: Li Qiang Message-Id: <1600237327-33618-6-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 1 + migration/ram.c | 11 +---------- migration/ram.h | 10 ++++++++++ 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 94c4e173bc..10315f7a63 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -20,6 +20,7 @@ #include "qemu/rcu_queue.h" #include "qapi/qapi-commands-migration.h" #include "migration.h" +#include "ram.h" #include "dirtyrate.h" static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; diff --git a/migration/ram.c b/migration/ram.c index c5f36aeae5..433489d633 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -158,21 +158,12 @@ out: return ret; } -static bool ramblock_is_ignored(RAMBlock *block) +bool ramblock_is_ignored(RAMBlock *block) { return !qemu_ram_is_migratable(block) || (migrate_ignore_shared() && qemu_ram_is_shared(block)); } -/* Should be holding either ram_list.mutex, or the RCU lock. */ -#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \ - INTERNAL_RAMBLOCK_FOREACH(block) \ - if (ramblock_is_ignored(block)) {} else - -#define RAMBLOCK_FOREACH_MIGRATABLE(block) \ - INTERNAL_RAMBLOCK_FOREACH(block) \ - if (!qemu_ram_is_migratable(block)) {} else - #undef RAMBLOCK_FOREACH int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque) diff --git a/migration/ram.h b/migration/ram.h index 2eeaacfa13..011e85414e 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -37,6 +37,16 @@ extern MigrationStats ram_counters; extern XBZRLECacheStats xbzrle_counters; extern CompressionStats compression_counters; +bool ramblock_is_ignored(RAMBlock *block); +/* Should be holding either ram_list.mutex, or the RCU lock. */ +#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \ + INTERNAL_RAMBLOCK_FOREACH(block) \ + if (ramblock_is_ignored(block)) {} else + +#define RAMBLOCK_FOREACH_MIGRATABLE(block) \ + INTERNAL_RAMBLOCK_FOREACH(block) \ + if (!qemu_ram_is_migratable(block)) {} else + int xbzrle_cache_resize(int64_t new_size, Error **errp); uint64_t ram_bytes_remaining(void); uint64_t ram_bytes_total(void); From ba0e519f953310267998d6af379304bd19c50f60 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:01 +0800 Subject: [PATCH 08/26] migration/dirtyrate: Record hash results for each sampled page Record hash results for each sampled page, crc32 is taken to calculate hash results for each sampled length in TARGET_PAGE_SIZE. Signed-off-by: Chuan Zheng Signed-off-by: YanYing Zhuang Reviewed-by: David Edmondson Reviewed-by: Li Qiang Message-Id: <1600237327-33618-7-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 109 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 10315f7a63..856d26dfe7 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -10,6 +10,7 @@ * See the COPYING file in the top-level directory. */ +#include #include "qemu/osdep.h" #include "qapi/error.h" #include "cpu.h" @@ -68,6 +69,114 @@ static void update_dirtyrate(uint64_t msec) DirtyStat.dirty_rate = dirtyrate; } +/* + * get hash result for the sampled memory with length of TARGET_PAGE_SIZE + * in ramblock, which starts from ramblock base address. + */ +static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info, + uint64_t vfn) +{ + uint32_t crc; + + crc = crc32(0, (info->ramblock_addr + + vfn * TARGET_PAGE_SIZE), TARGET_PAGE_SIZE); + + return crc; +} + +static bool save_ramblock_hash(struct RamblockDirtyInfo *info) +{ + unsigned int sample_pages_count; + int i; + GRand *rand; + + sample_pages_count = info->sample_pages_count; + + /* ramblock size less than one page, return success to skip this ramblock */ + if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) { + return true; + } + + info->hash_result = g_try_malloc0_n(sample_pages_count, + sizeof(uint32_t)); + if (!info->hash_result) { + return false; + } + + info->sample_page_vfn = g_try_malloc0_n(sample_pages_count, + sizeof(uint64_t)); + if (!info->sample_page_vfn) { + g_free(info->hash_result); + return false; + } + + rand = g_rand_new(); + for (i = 0; i < sample_pages_count; i++) { + info->sample_page_vfn[i] = g_rand_int_range(rand, 0, + info->ramblock_pages - 1); + info->hash_result[i] = get_ramblock_vfn_hash(info, + info->sample_page_vfn[i]); + } + g_rand_free(rand); + + return true; +} + +static void get_ramblock_dirty_info(RAMBlock *block, + struct RamblockDirtyInfo *info, + struct DirtyRateConfig *config) +{ + uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes; + + /* Right shift 30 bits to calc ramblock size in GB */ + info->sample_pages_count = (qemu_ram_get_used_length(block) * + sample_pages_per_gigabytes) >> 30; + /* Right shift TARGET_PAGE_BITS to calc page count */ + info->ramblock_pages = qemu_ram_get_used_length(block) >> + TARGET_PAGE_BITS; + info->ramblock_addr = qemu_ram_get_host_addr(block); + strcpy(info->idstr, qemu_ram_get_idstr(block)); +} + +static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, + struct DirtyRateConfig config, + int *block_count) +{ + struct RamblockDirtyInfo *info = NULL; + struct RamblockDirtyInfo *dinfo = NULL; + RAMBlock *block = NULL; + int total_count = 0; + int index = 0; + bool ret = false; + + RAMBLOCK_FOREACH_MIGRATABLE(block) { + total_count++; + } + + dinfo = g_try_malloc0_n(total_count, sizeof(struct RamblockDirtyInfo)); + if (dinfo == NULL) { + goto out; + } + + RAMBLOCK_FOREACH_MIGRATABLE(block) { + if (index >= total_count) { + break; + } + info = &dinfo[index]; + get_ramblock_dirty_info(block, info, &config); + if (!save_ramblock_hash(info)) { + goto out; + } + index++; + } + ret = true; + +out: + *block_count = index; + *block_dinfo = dinfo; + return ret; +} + static void calculate_dirtyrate(struct DirtyRateConfig config) { /* todo */ From 9c04387b88a7685ab2b1f77911a56e91d8f67b6d Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:02 +0800 Subject: [PATCH 09/26] migration/dirtyrate: Compare page hash results for recorded sampled page Compare page hash results for recorded sampled page. Signed-off-by: Chuan Zheng Signed-off-by: YanYing Zhuang Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Li Qiang Message-Id: <1600237327-33618-8-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 63 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 856d26dfe7..0f78aa3af7 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -177,6 +177,69 @@ out: return ret; } +static void calc_page_dirty_rate(struct RamblockDirtyInfo *info) +{ + uint32_t crc; + int i; + + for (i = 0; i < info->sample_pages_count; i++) { + crc = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]); + if (crc != info->hash_result[i]) { + info->sample_dirty_count++; + } + } +} + +static struct RamblockDirtyInfo * +find_block_matched(RAMBlock *block, int count, + struct RamblockDirtyInfo *infos) +{ + int i; + struct RamblockDirtyInfo *matched; + + for (i = 0; i < count; i++) { + if (!strcmp(infos[i].idstr, qemu_ram_get_idstr(block))) { + break; + } + } + + if (i == count) { + return NULL; + } + + if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) || + infos[i].ramblock_pages != + (qemu_ram_get_used_length(block) >> TARGET_PAGE_BITS)) { + return NULL; + } + + matched = &infos[i]; + + return matched; +} + +static bool compare_page_hash_info(struct RamblockDirtyInfo *info, + int block_count) +{ + struct RamblockDirtyInfo *block_dinfo = NULL; + RAMBlock *block = NULL; + + RAMBLOCK_FOREACH_MIGRATABLE(block) { + block_dinfo = find_block_matched(block, block_count, info); + if (block_dinfo == NULL) { + continue; + } + calc_page_dirty_rate(block_dinfo); + update_dirtyrate_stat(block_dinfo); + } + + if (DirtyStat.total_sample_count == 0) { + return false; + } + + return true; +} + static void calculate_dirtyrate(struct DirtyRateConfig config) { /* todo */ From f82583cdc05b89e5900e44ceacb6cc85f974b448 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:03 +0800 Subject: [PATCH 10/26] migration/dirtyrate: skip sampling ramblock with size below MIN_RAMBLOCK_SIZE In order to sample real RAM, skip ramblock with size below MIN_RAMBLOCK_SIZE which is set as 128M. Signed-off-by: Chuan Zheng Reviewed-by: David Edmondson Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Li Qiang Message-Id: <1600237327-33618-9-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 21 +++++++++++++++++++++ migration/dirtyrate.h | 5 +++++ 2 files changed, 26 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 0f78aa3af7..ee47e2bc27 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -138,6 +138,18 @@ static void get_ramblock_dirty_info(RAMBlock *block, strcpy(info->idstr, qemu_ram_get_idstr(block)); } +static bool skip_sample_ramblock(RAMBlock *block) +{ + /* + * Sample only blocks larger than MIN_RAMBLOCK_SIZE. + */ + if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) { + return true; + } + + return false; +} + static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, struct DirtyRateConfig config, int *block_count) @@ -150,6 +162,9 @@ static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, bool ret = false; RAMBLOCK_FOREACH_MIGRATABLE(block) { + if (skip_sample_ramblock(block)) { + continue; + } total_count++; } @@ -159,6 +174,9 @@ static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, } RAMBLOCK_FOREACH_MIGRATABLE(block) { + if (skip_sample_ramblock(block)) { + continue; + } if (index >= total_count) { break; } @@ -225,6 +243,9 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info, RAMBlock *block = NULL; RAMBLOCK_FOREACH_MIGRATABLE(block) { + if (skip_sample_ramblock(block)) { + continue; + } block_dinfo = find_block_matched(block, block_count, info); if (block_dinfo == NULL) { continue; diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h index 312debca6f..be5b8ec2b1 100644 --- a/migration/dirtyrate.h +++ b/migration/dirtyrate.h @@ -24,6 +24,11 @@ */ #define RAMBLOCK_INFO_MAX_LEN 256 +/* + * Minimum RAMBlock size to sample, in megabytes. + */ +#define MIN_RAMBLOCK_SIZE 128 + struct DirtyRateConfig { uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ int64_t sample_period_seconds; /* time duration between two sampling */ From eca582249c4cd3df2b6ff977e2ff481efb4c8517 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:04 +0800 Subject: [PATCH 11/26] migration/dirtyrate: Implement set_sample_page_period() and is_sample_period_valid() Implement is_sample_period_valid() to check if the sample period is vaild and do set_sample_page_period() to sleep specific time between sample actions. Signed-off-by: Chuan Zheng Reviewed-by: Dr. David Alan Gilbert Reviewed-by: David Edmondson Reviewed-by: Li Qiang Message-Id: <1600237327-33618-10-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 24 ++++++++++++++++++++++++ migration/dirtyrate.h | 6 ++++++ 2 files changed, 30 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index ee47e2bc27..20216c5f09 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -27,6 +27,30 @@ static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; static struct DirtyRateStat DirtyStat; +static int64_t set_sample_page_period(int64_t msec, int64_t initial_time) +{ + int64_t current_time; + + current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + if ((current_time - initial_time) >= msec) { + msec = current_time - initial_time; + } else { + g_usleep((msec + initial_time - current_time) * 1000); + } + + return msec; +} + +static bool is_sample_period_valid(int64_t sec) +{ + if (sec < MIN_FETCH_DIRTYRATE_TIME_SEC || + sec > MAX_FETCH_DIRTYRATE_TIME_SEC) { + return false; + } + + return true; +} + static int dirtyrate_set_state(int *state, int old_state, int new_state) { assert(new_state < DIRTY_RATE_STATUS__MAX); diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h index be5b8ec2b1..6ec429534d 100644 --- a/migration/dirtyrate.h +++ b/migration/dirtyrate.h @@ -29,6 +29,12 @@ */ #define MIN_RAMBLOCK_SIZE 128 +/* + * Take 1s as minimum time for calculation duration + */ +#define MIN_FETCH_DIRTYRATE_TIME_SEC 1 +#define MAX_FETCH_DIRTYRATE_TIME_SEC 60 + struct DirtyRateConfig { uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ int64_t sample_period_seconds; /* time duration between two sampling */ From cf0bbb49d8f4d1825d33d8dc8b07dc6edade807b Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:05 +0800 Subject: [PATCH 12/26] migration/dirtyrate: Implement calculate_dirtyrate() function Implement calculate_dirtyrate() function. Signed-off-by: Chuan Zheng Signed-off-by: YanYing Zhuang Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Li Qiang Message-Id: <1600237327-33618-11-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 45 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 20216c5f09..419b0e30ba 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -162,6 +162,21 @@ static void get_ramblock_dirty_info(RAMBlock *block, strcpy(info->idstr, qemu_ram_get_idstr(block)); } +static void free_ramblock_dirty_info(struct RamblockDirtyInfo *infos, int count) +{ + int i; + + if (!infos) { + return; + } + + for (i = 0; i < count; i++) { + g_free(infos[i].sample_page_vfn); + g_free(infos[i].hash_result); + } + g_free(infos); +} + static bool skip_sample_ramblock(RAMBlock *block) { /* @@ -287,8 +302,34 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info, static void calculate_dirtyrate(struct DirtyRateConfig config) { - /* todo */ - return; + struct RamblockDirtyInfo *block_dinfo = NULL; + int block_count = 0; + int64_t msec = 0; + int64_t initial_time; + + rcu_register_thread(); + reset_dirtyrate_stat(); + rcu_read_lock(); + initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) { + goto out; + } + rcu_read_unlock(); + + msec = config.sample_period_seconds * 1000; + msec = set_sample_page_period(msec, initial_time); + + rcu_read_lock(); + if (!compare_page_hash_info(block_dinfo, block_count)) { + goto out; + } + + update_dirtyrate(msec); + +out: + rcu_read_unlock(); + free_ramblock_dirty_info(block_dinfo, block_count); + rcu_unregister_thread(); } void *get_dirtyrate_thread(void *arg) From 4c437254b807364df3d36e90be3b86251f405a83 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:06 +0800 Subject: [PATCH 13/26] migration/dirtyrate: Implement qmp_cal_dirty_rate()/qmp_get_dirty_rate() function Implement qmp_cal_dirty_rate()/qmp_get_dirty_rate() function which could be called Signed-off-by: Chuan Zheng Message-Id: <1600237327-33618-12-git-send-email-zhengchuan@huawei.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert atomic function fixup Wording fixup in migration.json based on Eric's review --- migration/dirtyrate.c | 62 +++++++++++++++++++++++++++++++++++++++++++ qapi/migration.json | 50 ++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 419b0e30ba..2ffd88d48e 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -61,6 +61,24 @@ static int dirtyrate_set_state(int *state, int old_state, int new_state) } } +static struct DirtyRateInfo *query_dirty_rate_info(void) +{ + int64_t dirty_rate = DirtyStat.dirty_rate; + struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo)); + + if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) { + info->dirty_rate = dirty_rate; + } else { + info->dirty_rate = -1; + } + + info->status = CalculatingState; + info->start_time = DirtyStat.start_time; + info->calc_time = DirtyStat.calc_time; + + return info; +} + static void reset_dirtyrate_stat(void) { DirtyStat.total_dirty_samples = 0; @@ -318,6 +336,8 @@ static void calculate_dirtyrate(struct DirtyRateConfig config) msec = config.sample_period_seconds * 1000; msec = set_sample_page_period(msec, initial_time); + DirtyStat.start_time = initial_time / 1000; + DirtyStat.calc_time = msec / 1000; rcu_read_lock(); if (!compare_page_hash_info(block_dinfo, block_count)) { @@ -353,3 +373,45 @@ void *get_dirtyrate_thread(void *arg) } return NULL; } + +void qmp_calc_dirty_rate(int64_t calc_time, Error **errp) +{ + static struct DirtyRateConfig config; + QemuThread thread; + int ret; + + /* + * If the dirty rate is already being measured, don't attempt to start. + */ + if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURING) { + error_setg(errp, "the dirty rate is already being measured."); + return; + } + + if (!is_sample_period_valid(calc_time)) { + error_setg(errp, "calc-time is out of range[%d, %d].", + MIN_FETCH_DIRTYRATE_TIME_SEC, + MAX_FETCH_DIRTYRATE_TIME_SEC); + return; + } + + /* + * Init calculation state as unstarted. + */ + ret = dirtyrate_set_state(&CalculatingState, CalculatingState, + DIRTY_RATE_STATUS_UNSTARTED); + if (ret == -1) { + error_setg(errp, "init dirty rate calculation state failed."); + return; + } + + config.sample_period_seconds = calc_time; + config.sample_pages_per_gigabytes = DIRTYRATE_DEFAULT_SAMPLE_PAGES; + qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread, + (void *)&config, QEMU_THREAD_DETACHED); +} + +struct DirtyRateInfo *qmp_query_dirty_rate(Error **errp) +{ + return query_dirty_rate_info(); +} diff --git a/qapi/migration.json b/qapi/migration.json index 76a59b4f92..ce2216cfea 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -1737,3 +1737,53 @@ ## { 'enum': 'DirtyRateStatus', 'data': [ 'unstarted', 'measuring', 'measured'] } + +## +# @DirtyRateInfo: +# +# Information about current dirty page rate of vm. +# +# @dirty-rate: @dirtyrate describing the dirty page rate of vm +# in units of MB/s. +# If this field returns '-1', it means querying has not +# yet started or completed. +# +# @status: status containing dirtyrate query status includes +# 'unstarted' or 'measuring' or 'measured' +# +# @start-time: start time in units of second for calculation +# +# @calc-time: time in units of second for sample dirty pages +# +# Since: 5.2 +# +## +{ 'struct': 'DirtyRateInfo', + 'data': {'dirty-rate': 'int64', + 'status': 'DirtyRateStatus', + 'start-time': 'int64', + 'calc-time': 'int64'} } + +## +# @calc-dirty-rate: +# +# start calculating dirty page rate for vm +# +# @calc-time: time in units of second for sample dirty pages +# +# Since: 5.2 +# +# Example: +# {"command": "calc-dirty-rate", "data": {"calc-time": 1} } +# +## +{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64'} } + +## +# @query-dirty-rate: +# +# query dirty page rate in units of MB/s for vm +# +# Since: 5.2 +## +{ 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' } From 3c0b5dffc1768871960f3d904085e462b9e18a1f Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Wed, 16 Sep 2020 14:22:07 +0800 Subject: [PATCH 14/26] migration/dirtyrate: Add trace_calls to make it easier to debug Add trace_calls to make it easier to debug Signed-off-by: Chuan Zheng Reviewed-by: Dr. David Alan Gilbert Reviewed-by: David Edmondson Message-Id: <1600237327-33618-13-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/dirtyrate.c | 9 +++++++++ migration/trace-events | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 2ffd88d48e..68577ef250 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -22,6 +22,7 @@ #include "qapi/qapi-commands-migration.h" #include "migration.h" #include "ram.h" +#include "trace.h" #include "dirtyrate.h" static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; @@ -54,6 +55,7 @@ static bool is_sample_period_valid(int64_t sec) static int dirtyrate_set_state(int *state, int old_state, int new_state) { assert(new_state < DIRTY_RATE_STATUS__MAX); + trace_dirtyrate_set_state(DirtyRateStatus_str(new_state)); if (qatomic_cmpxchg(state, old_state, new_state) == old_state) { return 0; } else { @@ -76,6 +78,8 @@ static struct DirtyRateInfo *query_dirty_rate_info(void) info->start_time = DirtyStat.start_time; info->calc_time = DirtyStat.calc_time; + trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState)); + return info; } @@ -123,6 +127,7 @@ static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info, crc = crc32(0, (info->ramblock_addr + vfn * TARGET_PAGE_SIZE), TARGET_PAGE_SIZE); + trace_get_ramblock_vfn_hash(info->idstr, vfn, crc); return crc; } @@ -201,6 +206,8 @@ static bool skip_sample_ramblock(RAMBlock *block) * Sample only blocks larger than MIN_RAMBLOCK_SIZE. */ if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) { + trace_skip_sample_ramblock(block->idstr, + qemu_ram_get_used_length(block)); return true; } @@ -260,6 +267,7 @@ static void calc_page_dirty_rate(struct RamblockDirtyInfo *info) for (i = 0; i < info->sample_pages_count; i++) { crc = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]); if (crc != info->hash_result[i]) { + trace_calc_page_dirty_rate(info->idstr, crc, info->hash_result[i]); info->sample_dirty_count++; } } @@ -285,6 +293,7 @@ find_block_matched(RAMBlock *block, int count, if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) || infos[i].ramblock_pages != (qemu_ram_get_used_length(block) >> TARGET_PAGE_BITS)) { + trace_find_page_matched(block->idstr); return NULL; } diff --git a/migration/trace-events b/migration/trace-events index 7ba2fa6644..597a470b9d 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -313,3 +313,11 @@ dirty_bitmap_load_bits_zeroes(void) "" dirty_bitmap_load_header(uint32_t flags) "flags 0x%x" dirty_bitmap_load_enter(void) "" dirty_bitmap_load_success(void) "" + +# dirtyrate.c +dirtyrate_set_state(const char *new_state) "new state %s" +query_dirty_rate_info(const char *new_state) "current state %s" +get_ramblock_vfn_hash(const char *idstr, uint64_t vfn, uint32_t crc) "ramblock name: %s, vfn: %"PRIu64 ", crc: %" PRIu32 +calc_page_dirty_rate(const char *idstr, uint32_t new_crc, uint32_t old_crc) "ramblock name: %s, new crc: %" PRIu32 ", old crc: %" PRIu32 +skip_sample_ramblock(const char *idstr, uint64_t ramblock_size) "ramblock name: %s, ramblock size: %" PRIu64 +find_page_matched(const char *idstr) "ramblock %s addr or size changed" From b4deb9bf8d97dbee07781e90e2089c6410ea9098 Mon Sep 17 00:00:00 2001 From: Dov Murik Date: Mon, 21 Sep 2020 09:48:30 +0000 Subject: [PATCH 15/26] migration: Truncate state file in xen-save-devices-state When running the xen-save-devices-state QMP command, if the filename already exists it will be truncated before dumping the devices' state into it. Signed-off-by: Dov Murik Message-Id: <20200921094830.114028-1-dovmurik@linux.vnet.ibm.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/savevm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/migration/savevm.c b/migration/savevm.c index ee21e981ba..34e4b71052 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2803,7 +2803,8 @@ void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live, vm_stop(RUN_STATE_SAVE_VM); global_state_store_running(); - ioc = qio_channel_file_new_path(filename, O_WRONLY | O_CREAT, 0660, errp); + ioc = qio_channel_file_new_path(filename, O_WRONLY | O_CREAT | O_TRUNC, + 0660, errp); if (!ioc) { goto the_end; } From 7590a2ae091fde8bb72d5df93977ab9707e23242 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Mon, 21 Sep 2020 16:49:57 +0200 Subject: [PATCH 16/26] migration: increase max-bandwidth to 128 MiB/s (1 Gib/s) max-bandwidth is set by default to 32 MiB/s (256 Mib/s) since 2008 (5bb7910af031c). Most of the CPUs can dirty memory faster than that now, and this is clearly a problem with POWER where the page size is 64 kiB and not 4 KiB. Signed-off-by: Laurent Vivier Message-Id: <20200921144957.979989-1-lvivier@redhat.com> Reviewed-by: David Gibson Reviewed-by: Greg Kurz Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index 827f8be07d..de34c995af 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -57,7 +57,7 @@ #include "qemu/queue.h" #include "multifd.h" -#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ +#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ /* Amount of time to allocate to each "chunk" of bandwidth-throttled * data. */ From d8053e73fb2d295279b5cc4de7dc06bd581241ca Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Tue, 15 Sep 2020 11:03:57 +0800 Subject: [PATCH 17/26] migration/tls: save hostname into MigrationState MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hostname is need in multifd-tls, save hostname into MigrationState. Signed-off-by: Chuan Zheng Signed-off-by: Yan Jin Message-Id: <1600139042-104593-2-git-send-email-zhengchuan@huawei.com> Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert --- migration/channel.c | 1 + migration/migration.c | 1 + migration/migration.h | 5 +++++ migration/tls.c | 2 ++ 4 files changed, 9 insertions(+) diff --git a/migration/channel.c b/migration/channel.c index 20e4c8e2dc..8a783baa0b 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -90,5 +90,6 @@ void migration_channel_connect(MigrationState *s, } } migrate_fd_connect(s, error); + g_free(s->hostname); error_free(error); } diff --git a/migration/migration.c b/migration/migration.c index de34c995af..838ca79f57 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1896,6 +1896,7 @@ void migrate_init(MigrationState *s) s->migration_thread_running = false; error_free(s->error); s->error = NULL; + s->hostname = NULL; migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); diff --git a/migration/migration.h b/migration/migration.h index e74042362d..deb411aaad 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -259,6 +259,11 @@ struct MigrationState * (which is in 4M chunk). */ uint8_t clear_bitmap_shift; + + /* + * This save hostname when out-going migration starts + */ + char *hostname; }; void migrate_set_state(int *state, int old_state, int new_state); diff --git a/migration/tls.c b/migration/tls.c index 7a02ec8656..8fbf9ac796 100644 --- a/migration/tls.c +++ b/migration/tls.c @@ -154,6 +154,8 @@ void migration_tls_channel_connect(MigrationState *s, return; } + /* Save hostname into MigrationState for handshake */ + s->hostname = g_strdup(hostname); trace_migration_tls_outgoing_handshake_start(hostname); qio_channel_set_name(QIO_CHANNEL(tioc), "migration-tls-outgoing"); qio_channel_tls_handshake(tioc, From bfb790e7b2bbbc19686cf6bc3b03742b8361ac82 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Tue, 15 Sep 2020 11:03:58 +0800 Subject: [PATCH 18/26] migration/tls: extract migration_tls_client_create for common-use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit migration_tls_client_create will be used in multifd-tls, let's extract it. Signed-off-by: Chuan Zheng Signed-off-by: Yan Jin Reviewed-by: Daniel P. Berrangé Message-Id: <1600139042-104593-3-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/tls.c | 26 ++++++++++++++++++-------- migration/tls.h | 6 ++++++ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/migration/tls.c b/migration/tls.c index 8fbf9ac796..66c6f43221 100644 --- a/migration/tls.c +++ b/migration/tls.c @@ -22,7 +22,6 @@ #include "channel.h" #include "migration.h" #include "tls.h" -#include "io/channel-tls.h" #include "crypto/tlscreds.h" #include "qemu/error-report.h" #include "qapi/error.h" @@ -125,11 +124,10 @@ static void migration_tls_outgoing_handshake(QIOTask *task, object_unref(OBJECT(ioc)); } - -void migration_tls_channel_connect(MigrationState *s, - QIOChannel *ioc, - const char *hostname, - Error **errp) +QIOChannelTLS *migration_tls_client_create(MigrationState *s, + QIOChannel *ioc, + const char *hostname, + Error **errp) { QCryptoTLSCreds *creds; QIOChannelTLS *tioc; @@ -137,7 +135,7 @@ void migration_tls_channel_connect(MigrationState *s, creds = migration_tls_get_creds( s, QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT, errp); if (!creds) { - return; + return NULL; } if (s->parameters.tls_hostname && *s->parameters.tls_hostname) { @@ -145,11 +143,23 @@ void migration_tls_channel_connect(MigrationState *s, } if (!hostname) { error_setg(errp, "No hostname available for TLS"); - return; + return NULL; } tioc = qio_channel_tls_new_client( ioc, creds, hostname, errp); + + return tioc; +} + +void migration_tls_channel_connect(MigrationState *s, + QIOChannel *ioc, + const char *hostname, + Error **errp) +{ + QIOChannelTLS *tioc; + + tioc = migration_tls_client_create(s, ioc, hostname, errp); if (!tioc) { return; } diff --git a/migration/tls.h b/migration/tls.h index cdd70001ed..0cfbe368ba 100644 --- a/migration/tls.h +++ b/migration/tls.h @@ -22,11 +22,17 @@ #define QEMU_MIGRATION_TLS_H #include "io/channel.h" +#include "io/channel-tls.h" void migration_tls_channel_process_incoming(MigrationState *s, QIOChannel *ioc, Error **errp); +QIOChannelTLS *migration_tls_client_create(MigrationState *s, + QIOChannel *ioc, + const char *hostname, + Error **errp); + void migration_tls_channel_connect(MigrationState *s, QIOChannel *ioc, const char *hostname, From 8e5fa059328643affe1cdb269f3f5ae1a52fe7ac Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Tue, 15 Sep 2020 11:03:59 +0800 Subject: [PATCH 19/26] migration/tls: add tls_hostname into MultiFDSendParams MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since multifd creation is async with migration_channel_connect, we should pass the hostname from MigrationState to MultiFDSendParams. Signed-off-by: Chuan Zheng Signed-off-by: Yan Jin Message-Id: <1600139042-104593-4-git-send-email-zhengchuan@huawei.com> Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert --- migration/multifd.c | 5 +++++ migration/multifd.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/migration/multifd.c b/migration/multifd.c index fd57378db8..de34276c43 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -548,6 +548,8 @@ void multifd_save_cleanup(void) qemu_sem_destroy(&p->sem_sync); g_free(p->name); p->name = NULL; + g_free(p->tls_hostname); + p->tls_hostname = NULL; multifd_pages_clear(p->pages); p->pages = NULL; p->packet_len = 0; @@ -751,10 +753,12 @@ int multifd_save_setup(Error **errp) int thread_count; uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); uint8_t i; + MigrationState *s; if (!migrate_use_multifd()) { return 0; } + s = migrate_get_current(); thread_count = migrate_multifd_channels(); multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); @@ -779,6 +783,7 @@ int multifd_save_setup(Error **errp) p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); p->packet->version = cpu_to_be32(MULTIFD_VERSION); p->name = g_strdup_printf("multifdsend_%d", i); + p->tls_hostname = g_strdup(s->hostname); socket_send_channel_create(multifd_new_send_channel_async, p); } diff --git a/migration/multifd.h b/migration/multifd.h index 448a03d89a..8d6751f5ed 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -71,6 +71,8 @@ typedef struct { uint8_t id; /* channel thread name */ char *name; + /* tls hostname */ + char *tls_hostname; /* channel thread id */ QemuThread thread; /* communication channel */ From 03c7a42d0de6437923188238d4a1278ede0608ef Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Tue, 15 Sep 2020 11:04:00 +0800 Subject: [PATCH 20/26] migration/tls: extract cleanup function for common-use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit multifd channel cleanup is need if multifd handshake failed, let's extract it. Signed-off-by: Chuan Zheng Signed-off-by: Yan Jin Reviewed-by: Daniel P. Berrangé Message-Id: <1600139042-104593-5-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/multifd.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/migration/multifd.c b/migration/multifd.c index de34276c43..36d4a403a5 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -719,6 +719,23 @@ out: return NULL; } +static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, + QIOChannel *ioc, Error *err) +{ + migrate_set_error(migrate_get_current(), err); + /* Error happen, we need to tell who pay attention to me */ + qemu_sem_post(&multifd_send_state->channels_ready); + qemu_sem_post(&p->sem_sync); + /* + * Although multifd_send_thread is not created, but main migration + * thread neet to judge whether it is running, so we need to mark + * its status. + */ + p->quit = true; + object_unref(OBJECT(ioc)); + error_free(err); +} + static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) { MultiFDSendParams *p = opaque; @@ -727,25 +744,18 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) trace_multifd_new_send_channel_async(p->id); if (qio_task_propagate_error(task, &local_err)) { - migrate_set_error(migrate_get_current(), local_err); - /* Error happen, we need to tell who pay attention to me */ - qemu_sem_post(&multifd_send_state->channels_ready); - qemu_sem_post(&p->sem_sync); - /* - * Although multifd_send_thread is not created, but main migration - * thread needs to judge whether it is running, so we need to mark - * its status. - */ - p->quit = true; - object_unref(OBJECT(sioc)); - error_free(local_err); + goto cleanup; } else { p->c = QIO_CHANNEL(sioc); qio_channel_set_delay(p->c, false); p->running = true; qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, QEMU_THREAD_JOINABLE); + return; } + +cleanup: + multifd_new_send_channel_cleanup(p, sioc, local_err); } int multifd_save_setup(Error **errp) From 29647140157a4c91d4f80b8e8a633c6aabf579a6 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Tue, 15 Sep 2020 11:04:01 +0800 Subject: [PATCH 21/26] migration/tls: add support for multifd tls-handshake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar like migration main thread, we need to do handshake for each multifd thread. Signed-off-by: Chuan Zheng Signed-off-by: Yan Jin Reviewed-by: Daniel P. Berrangé Message-Id: <1600139042-104593-6-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/multifd.c | 77 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 2 deletions(-) diff --git a/migration/multifd.c b/migration/multifd.c index 36d4a403a5..67e39593a7 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -20,6 +20,7 @@ #include "ram.h" #include "migration.h" #include "socket.h" +#include "tls.h" #include "qemu-file.h" #include "trace.h" #include "multifd.h" @@ -719,6 +720,77 @@ out: return NULL; } +static bool multifd_channel_connect(MultiFDSendParams *p, + QIOChannel *ioc, + Error *error); + +static void multifd_tls_outgoing_handshake(QIOTask *task, + gpointer opaque) +{ + MultiFDSendParams *p = opaque; + QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task)); + Error *err = NULL; + + qio_task_propagate_error(task, &err); + multifd_channel_connect(p, ioc, err); +} + +static void multifd_tls_channel_connect(MultiFDSendParams *p, + QIOChannel *ioc, + Error **errp) +{ + MigrationState *s = migrate_get_current(); + const char *hostname = p->tls_hostname; + QIOChannelTLS *tioc; + + tioc = migration_tls_client_create(s, ioc, hostname, errp); + if (!tioc) { + return; + } + + qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); + qio_channel_tls_handshake(tioc, + multifd_tls_outgoing_handshake, + p, + NULL, + NULL); + +} + +static bool multifd_channel_connect(MultiFDSendParams *p, + QIOChannel *ioc, + Error *error) +{ + MigrationState *s = migrate_get_current(); + + if (!error) { + if (s->parameters.tls_creds && + *s->parameters.tls_creds && + !object_dynamic_cast(OBJECT(ioc), + TYPE_QIO_CHANNEL_TLS)) { + multifd_tls_channel_connect(p, ioc, &error); + if (!error) { + /* + * tls_channel_connect will call back to this + * function after the TLS handshake, + * so we mustn't call multifd_send_thread until then + */ + return false; + } else { + return true; + } + } else { + /* update for tls qio channel */ + p->c = ioc; + qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, + QEMU_THREAD_JOINABLE); + } + return false; + } + + return true; +} + static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, QIOChannel *ioc, Error *err) { @@ -749,8 +821,9 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) p->c = QIO_CHANNEL(sioc); qio_channel_set_delay(p->c, false); p->running = true; - qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, - QEMU_THREAD_JOINABLE); + if (multifd_channel_connect(p, sioc, local_err)) { + goto cleanup; + } return; } From 894f021411885ac11c53f1e4d6e48c630ce1bc85 Mon Sep 17 00:00:00 2001 From: Chuan Zheng Date: Tue, 15 Sep 2020 11:04:02 +0800 Subject: [PATCH 22/26] migration/tls: add trace points for multifd-tls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit add trace points for multifd-tls for debug. Signed-off-by: Chuan Zheng Signed-off-by: Yan Jin Reviewed-by: Daniel P. Berrangé Message-Id: <1600139042-104593-7-git-send-email-zhengchuan@huawei.com> Signed-off-by: Dr. David Alan Gilbert --- migration/multifd.c | 10 +++++++++- migration/trace-events | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/migration/multifd.c b/migration/multifd.c index 67e39593a7..776f963436 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -731,7 +731,11 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task)); Error *err = NULL; - qio_task_propagate_error(task, &err); + if (qio_task_propagate_error(task, &err)) { + trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err)); + } else { + trace_multifd_tls_outgoing_handshake_complete(ioc); + } multifd_channel_connect(p, ioc, err); } @@ -748,6 +752,7 @@ static void multifd_tls_channel_connect(MultiFDSendParams *p, return; } + trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); qio_channel_tls_handshake(tioc, multifd_tls_outgoing_handshake, @@ -763,6 +768,9 @@ static bool multifd_channel_connect(MultiFDSendParams *p, { MigrationState *s = migrate_get_current(); + trace_multifd_set_outgoing_channel( + ioc, object_get_typename(OBJECT(ioc)), p->tls_hostname, error); + if (!error) { if (s->parameters.tls_creds && *s->parameters.tls_creds && diff --git a/migration/trace-events b/migration/trace-events index 597a470b9d..338f38b3dd 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -129,6 +129,10 @@ multifd_send_sync_main_wait(uint8_t id) "channel %d" multifd_send_terminate_threads(bool error) "error %d" multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 multifd_send_thread_start(uint8_t id) "%d" +multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" +multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" +multifd_tls_outgoing_handshake_complete(void *ioc) "ioc=%p" +multifd_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostname, void *err) "ioc=%p ioctype=%s hostname=%s err=%p" # migration.c await_return_path_close_on_source_close(void) "" From 0210c3b39bef08b7e1d92f28ece771c1970c5f35 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 22 Sep 2020 10:57:41 +0100 Subject: [PATCH 23/26] monitor: Use LOCK_GUARD macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the lock guard macros in monitor/misc.c - saves a lot of unlocks in error paths, and the occasional goto. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20200922095741.101911-1-dgilbert@redhat.com> Reviewed-by: Markus Armbruster Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Dr. David Alan Gilbert --- monitor/misc.c | 44 ++++++++++++++------------------------------ 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/monitor/misc.c b/monitor/misc.c index 262f2bd951..6e0da0cb96 100644 --- a/monitor/misc.c +++ b/monitor/misc.c @@ -141,13 +141,13 @@ char *qmp_human_monitor_command(const char *command_line, bool has_cpu_index, handle_hmp_command(&hmp, command_line); cur_mon = old_mon; - qemu_mutex_lock(&hmp.common.mon_lock); - if (qstring_get_length(hmp.common.outbuf) > 0) { - output = g_strdup(qstring_get_str(hmp.common.outbuf)); - } else { - output = g_strdup(""); + WITH_QEMU_LOCK_GUARD(&hmp.common.mon_lock) { + if (qstring_get_length(hmp.common.outbuf) > 0) { + output = g_strdup(qstring_get_str(hmp.common.outbuf)); + } else { + output = g_strdup(""); + } } - qemu_mutex_unlock(&hmp.common.mon_lock); out: monitor_data_destroy(&hmp.common); @@ -1248,7 +1248,7 @@ void qmp_getfd(const char *fdname, Error **errp) return; } - qemu_mutex_lock(&cur_mon->mon_lock); + QEMU_LOCK_GUARD(&cur_mon->mon_lock); QLIST_FOREACH(monfd, &cur_mon->fds, next) { if (strcmp(monfd->name, fdname) != 0) { continue; @@ -1256,7 +1256,6 @@ void qmp_getfd(const char *fdname, Error **errp) tmp_fd = monfd->fd; monfd->fd = fd; - qemu_mutex_unlock(&cur_mon->mon_lock); /* Make sure close() is outside critical section */ close(tmp_fd); return; @@ -1267,7 +1266,6 @@ void qmp_getfd(const char *fdname, Error **errp) monfd->fd = fd; QLIST_INSERT_HEAD(&cur_mon->fds, monfd, next); - qemu_mutex_unlock(&cur_mon->mon_lock); } void qmp_closefd(const char *fdname, Error **errp) @@ -1299,7 +1297,7 @@ int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp) { mon_fd_t *monfd; - qemu_mutex_lock(&mon->mon_lock); + QEMU_LOCK_GUARD(&mon->mon_lock); QLIST_FOREACH(monfd, &mon->fds, next) { int fd; @@ -1313,12 +1311,10 @@ int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp) QLIST_REMOVE(monfd, next); g_free(monfd->name); g_free(monfd); - qemu_mutex_unlock(&mon->mon_lock); return fd; } - qemu_mutex_unlock(&mon->mon_lock); error_setg(errp, "File descriptor named '%s' has not been found", fdname); return -1; } @@ -1350,11 +1346,10 @@ void monitor_fdsets_cleanup(void) MonFdset *mon_fdset; MonFdset *mon_fdset_next; - qemu_mutex_lock(&mon_fdsets_lock); + QEMU_LOCK_GUARD(&mon_fdsets_lock); QLIST_FOREACH_SAFE(mon_fdset, &mon_fdsets, next, mon_fdset_next) { monitor_fdset_cleanup(mon_fdset); } - qemu_mutex_unlock(&mon_fdsets_lock); } AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque, @@ -1389,7 +1384,7 @@ void qmp_remove_fd(int64_t fdset_id, bool has_fd, int64_t fd, Error **errp) MonFdsetFd *mon_fdset_fd; char fd_str[60]; - qemu_mutex_lock(&mon_fdsets_lock); + QEMU_LOCK_GUARD(&mon_fdsets_lock); QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { if (mon_fdset->id != fdset_id) { continue; @@ -1409,12 +1404,10 @@ void qmp_remove_fd(int64_t fdset_id, bool has_fd, int64_t fd, Error **errp) goto error; } monitor_fdset_cleanup(mon_fdset); - qemu_mutex_unlock(&mon_fdsets_lock); return; } error: - qemu_mutex_unlock(&mon_fdsets_lock); if (has_fd) { snprintf(fd_str, sizeof(fd_str), "fdset-id:%" PRId64 ", fd:%" PRId64, fdset_id, fd); @@ -1430,7 +1423,7 @@ FdsetInfoList *qmp_query_fdsets(Error **errp) MonFdsetFd *mon_fdset_fd; FdsetInfoList *fdset_list = NULL; - qemu_mutex_lock(&mon_fdsets_lock); + QEMU_LOCK_GUARD(&mon_fdsets_lock); QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { FdsetInfoList *fdset_info = g_malloc0(sizeof(*fdset_info)); FdsetFdInfoList *fdsetfd_list = NULL; @@ -1460,7 +1453,6 @@ FdsetInfoList *qmp_query_fdsets(Error **errp) fdset_info->next = fdset_list; fdset_list = fdset_info; } - qemu_mutex_unlock(&mon_fdsets_lock); return fdset_list; } @@ -1554,7 +1546,7 @@ int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags) #else MonFdset *mon_fdset; - qemu_mutex_lock(&mon_fdsets_lock); + QEMU_LOCK_GUARD(&mon_fdsets_lock); QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { MonFdsetFd *mon_fdset_fd; MonFdsetFd *mon_fdset_fd_dup; @@ -1569,7 +1561,6 @@ int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags) QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) { mon_fd_flags = fcntl(mon_fdset_fd->fd, F_GETFL); if (mon_fd_flags == -1) { - qemu_mutex_unlock(&mon_fdsets_lock); return -1; } @@ -1580,25 +1571,21 @@ int monitor_fdset_dup_fd_add(int64_t fdset_id, int flags) } if (fd == -1) { - qemu_mutex_unlock(&mon_fdsets_lock); errno = EACCES; return -1; } dup_fd = qemu_dup_flags(fd, flags); if (dup_fd == -1) { - qemu_mutex_unlock(&mon_fdsets_lock); return -1; } mon_fdset_fd_dup = g_malloc0(sizeof(*mon_fdset_fd_dup)); mon_fdset_fd_dup->fd = dup_fd; QLIST_INSERT_HEAD(&mon_fdset->dup_fds, mon_fdset_fd_dup, next); - qemu_mutex_unlock(&mon_fdsets_lock); return dup_fd; } - qemu_mutex_unlock(&mon_fdsets_lock); errno = ENOENT; return -1; #endif @@ -1609,7 +1596,7 @@ static int64_t monitor_fdset_dup_fd_find_remove(int dup_fd, bool remove) MonFdset *mon_fdset; MonFdsetFd *mon_fdset_fd_dup; - qemu_mutex_lock(&mon_fdsets_lock); + QEMU_LOCK_GUARD(&mon_fdsets_lock); QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { QLIST_FOREACH(mon_fdset_fd_dup, &mon_fdset->dup_fds, next) { if (mon_fdset_fd_dup->fd == dup_fd) { @@ -1619,17 +1606,14 @@ static int64_t monitor_fdset_dup_fd_find_remove(int dup_fd, bool remove) if (QLIST_EMPTY(&mon_fdset->dup_fds)) { monitor_fdset_cleanup(mon_fdset); } - goto err; + return -1; } else { - qemu_mutex_unlock(&mon_fdsets_lock); return mon_fdset->id; } } } } -err: - qemu_mutex_unlock(&mon_fdsets_lock); return -1; } From f1303afe222759105fc1787992098f5754c7e296 Mon Sep 17 00:00:00 2001 From: "Harry G. Coin" Date: Wed, 16 Sep 2020 12:22:50 +0100 Subject: [PATCH 24/26] virtiofsd: document cache=auto default The virtiofsd --help output documents the cache=auto default value but the man page does not. Fix this. Signed-off-by: Harry G. Coin Signed-off-by: Stefan Hajnoczi Message-Id: <20200916112250.760245-1-stefanha@redhat.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- docs/tools/virtiofsd.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst index 7fe6a87291..ae02938a95 100644 --- a/docs/tools/virtiofsd.rst +++ b/docs/tools/virtiofsd.rst @@ -103,6 +103,7 @@ Options forbids the FUSE client from caching to achieve best coherency at the cost of performance. ``auto`` acts similar to NFS with a 1 second metadata cache timeout. ``always`` sets a long cache lifetime at the expense of coherency. + The default is ``auto``. Examples -------- From 04d325e86f79bd61f8fd50d45ff795aca0dd3404 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 21 Sep 2020 17:32:16 -0400 Subject: [PATCH 25/26] virtiofsd: Used glib "shared" thread pool glib offers thread pools and it seems to support "exclusive" and "shared" thread pools. https://developer.gnome.org/glib/stable/glib-Thread-Pools.html#g-thread-pool-new Currently we use "exlusive" thread pools but its performance seems to be poor. I tried using "shared" thread pools and performance seems much better. I posted performance results here. https://www.redhat.com/archives/virtio-fs/2020-September/msg00080.html So lets switch to shared thread pools. We can think of making it optional once somebody can show in what cases exclusive thread pools offer better results. For now, my simple performance tests across the board see better results with shared thread pools. Signed-off-by: Vivek Goyal Message-Id: <20200921213216.GE13362@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Dr. David Alan Gilbert With seccomp fix from Miklos --- tools/virtiofsd/fuse_virtio.c | 2 +- tools/virtiofsd/passthrough_seccomp.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c index 9e5537506c..d5c8e98253 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c @@ -588,7 +588,7 @@ static void *fv_queue_thread(void *opaque) struct fuse_session *se = qi->virtio_dev->se; GThreadPool *pool; - pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, TRUE, + pool = g_thread_pool_new(fv_queue_worker, qi, se->thread_pool_size, FALSE, NULL); if (!pool) { fuse_log(FUSE_LOG_ERR, "%s: g_thread_pool_new failed\n", __func__); diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c index 19fee60011..eb9af8265f 100644 --- a/tools/virtiofsd/passthrough_seccomp.c +++ b/tools/virtiofsd/passthrough_seccomp.c @@ -93,6 +93,8 @@ static const int syscall_whitelist[] = { SCMP_SYS(rt_sigaction), SCMP_SYS(rt_sigprocmask), SCMP_SYS(rt_sigreturn), + SCMP_SYS(sched_getattr), + SCMP_SYS(sched_setattr), SCMP_SYS(sendmsg), SCMP_SYS(setresgid), SCMP_SYS(setresuid), From e12a0edafeb5019aac74114b62a4703f79c5c693 Mon Sep 17 00:00:00 2001 From: Jiachen Zhang Date: Mon, 24 Aug 2020 18:59:57 +0800 Subject: [PATCH 26/26] virtiofsd: Add -o allow_direct_io|no_allow_direct_io options Due to the commit 65da4539803373ec4eec97ffc49ee90083e56efd, the O_DIRECT open flag of guest applications will be discarded by virtiofsd. While this behavior makes it consistent with the virtio-9p scheme when guest applications use direct I/O, we no longer have any chance to bypass the host page cache. Therefore, we add a flag 'allow_direct_io' to lo_data. If '-o no_allow_direct_io' option is added, or none of '-o allow_direct_io' or '-o no_allow_direct_io' is added, the 'allow_direct_io' will be set to 0, and virtiofsd discards O_DIRECT as before. If '-o allow_direct_io' is added to the starting command-line, 'allow_direct_io' will be set to 1, so that the O_DIRECT flags will be retained and host page cache can be bypassed. Signed-off-by: Jiachen Zhang Reviewed-by: Stefan Hajnoczi Message-Id: <20200824105957.61265-1-zhangjiachen.jaycee@bytedance.com> Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/helper.c | 4 ++++ tools/virtiofsd/passthrough_ll.c | 20 ++++++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c index 7bc5d7dc5a..85770d63f1 100644 --- a/tools/virtiofsd/helper.c +++ b/tools/virtiofsd/helper.c @@ -178,6 +178,10 @@ void fuse_cmdline_help(void) " (0 leaves rlimit unchanged)\n" " default: min(1000000, fs.file-max - 16384)\n" " if the current rlimit is lower\n" + " -o allow_direct_io|no_allow_direct_io\n" + " retain/discard O_DIRECT flags passed down\n" + " to virtiofsd from guest applications.\n" + " default: no_allow_direct_io\n" ); } diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index 784330e0e4..0b229ebd57 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -151,6 +151,7 @@ struct lo_data { int timeout_set; int readdirplus_set; int readdirplus_clear; + int allow_direct_io; struct lo_inode root; GHashTable *inodes; /* protected by lo->mutex */ struct lo_map ino_map; /* protected by lo->mutex */ @@ -179,6 +180,8 @@ static const struct fuse_opt lo_opts[] = { { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, + { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 }, + { "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 }, FUSE_OPT_END }; static bool use_syslog = false; @@ -1516,7 +1519,8 @@ static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, fuse_reply_err(req, 0); } -static void update_open_flags(int writeback, struct fuse_file_info *fi) +static void update_open_flags(int writeback, int allow_direct_io, + struct fuse_file_info *fi) { /* * With writeback cache, kernel may send read requests even @@ -1541,10 +1545,13 @@ static void update_open_flags(int writeback, struct fuse_file_info *fi) /* * O_DIRECT in guest should not necessarily mean bypassing page - * cache on host as well. If somebody needs that behavior, it - * probably should be a configuration knob in daemon. + * cache on host as well. Therefore, we discard it by default + * ('-o no_allow_direct_io'). If somebody needs that behavior, + * the '-o allow_direct_io' option should be set. */ - fi->flags &= ~O_DIRECT; + if (!allow_direct_io) { + fi->flags &= ~O_DIRECT; + } } static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, @@ -1576,7 +1583,7 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, goto out; } - update_open_flags(lo->writeback, fi); + update_open_flags(lo->writeback, lo->allow_direct_io, fi); fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); @@ -1786,7 +1793,7 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, fi->flags); - update_open_flags(lo->writeback, fi); + update_open_flags(lo->writeback, lo->allow_direct_io, fi); sprintf(buf, "%i", lo_fd(req, ino)); fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); @@ -2823,6 +2830,7 @@ int main(int argc, char *argv[]) .debug = 0, .writeback = 0, .posix_lock = 0, + .allow_direct_io = 0, .proc_self_fd = -1, }; struct lo_map_elem *root_elem;