From b0e5de93811077254a536c23b713b49e12efb742 Mon Sep 17 00:00:00 2001 From: Junyan He Date: Wed, 18 Jul 2018 15:47:57 +0800 Subject: [PATCH 1/7] memory, exec: Expose all memory block related flags. We need to use these flags in other files rather than just in exec.c, For example, RAM_SHARED should be used when create a ram block from file. We expose them the exec/memory.h Signed-off-by: Junyan He Reviewed-by: Stefan Hajnoczi Reviewed-by: Igor Mammedov Reviewed-by: Richard Henderson Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- exec.c | 20 -------------------- include/exec/memory.h | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/exec.c b/exec.c index 4f5df07b6a..cc042dcefd 100644 --- a/exec.c +++ b/exec.c @@ -87,26 +87,6 @@ AddressSpace address_space_memory; MemoryRegion io_mem_rom, io_mem_notdirty; static MemoryRegion io_mem_unassigned; - -/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ -#define RAM_PREALLOC (1 << 0) - -/* RAM is mmap-ed with MAP_SHARED */ -#define RAM_SHARED (1 << 1) - -/* Only a portion of RAM (used_length) is actually used, and migrated. - * This used_length size can change across reboots. - */ -#define RAM_RESIZEABLE (1 << 2) - -/* UFFDIO_ZEROPAGE is available on this RAMBlock to atomically - * zero the page and wake waiting processes. - * (Set during postcopy) - */ -#define RAM_UF_ZEROPAGE (1 << 3) - -/* RAM can be migrated */ -#define RAM_MIGRATABLE (1 << 4) #endif #ifdef TARGET_PAGE_BITS_VARY diff --git a/include/exec/memory.h b/include/exec/memory.h index 448d41a752..6d0af29155 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -103,6 +103,26 @@ struct IOMMUNotifier { }; typedef struct IOMMUNotifier IOMMUNotifier; +/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ +#define RAM_PREALLOC (1 << 0) + +/* RAM is mmap-ed with MAP_SHARED */ +#define RAM_SHARED (1 << 1) + +/* Only a portion of RAM (used_length) is actually used, and migrated. + * This used_length size can change across reboots. + */ +#define RAM_RESIZEABLE (1 << 2) + +/* UFFDIO_ZEROPAGE is available on this RAMBlock to atomically + * zero the page and wake waiting processes. + * (Set during postcopy) + */ +#define RAM_UF_ZEROPAGE (1 << 3) + +/* RAM can be migrated */ +#define RAM_MIGRATABLE (1 << 4) + static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, IOMMUNotifierFlag flags, hwaddr start, hwaddr end, From cbfc01710362f3de6fca3010a17b0e1c866fc181 Mon Sep 17 00:00:00 2001 From: Junyan He Date: Wed, 18 Jul 2018 15:47:58 +0800 Subject: [PATCH 2/7] memory, exec: switch file ram allocation functions to 'flags' parameters As more flag parameters besides the existing 'share' are going to be added to following functions memory_region_init_ram_from_file qemu_ram_alloc_from_fd qemu_ram_alloc_from_file let's switch them to use the 'flags' parameters so as to ease future flag additions. The existing 'share' flag is converted to the RAM_SHARED bit in ram_flags, and other flag bits are ignored by above functions right now. Signed-off-by: Junyan He Signed-off-by: Haozhong Zhang Reviewed-by: Stefan Hajnoczi Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Richard Henderson --- backends/hostmem-file.c | 3 ++- exec.c | 10 +++++----- include/exec/memory.h | 7 +++++-- include/exec/ram_addr.h | 25 +++++++++++++++++++++++-- memory.c | 8 +++++--- numa.c | 2 +- 6 files changed, 41 insertions(+), 14 deletions(-) diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index 134b08d63a..34c68bb081 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -58,7 +58,8 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) path = object_get_canonical_path(OBJECT(backend)); memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), path, - backend->size, fb->align, backend->share, + backend->size, fb->align, + backend->share ? RAM_SHARED : 0, fb->mem_path, errp); g_free(path); } diff --git a/exec.c b/exec.c index cc042dcefd..3b8f91448d 100644 --- a/exec.c +++ b/exec.c @@ -2238,7 +2238,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared) #ifdef __linux__ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, - bool share, int fd, + uint32_t ram_flags, int fd, Error **errp) { RAMBlock *new_block; @@ -2280,14 +2280,14 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, new_block->mr = mr; new_block->used_length = size; new_block->max_length = size; - new_block->flags = share ? RAM_SHARED : 0; + new_block->flags = ram_flags; new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp); if (!new_block->host) { g_free(new_block); return NULL; } - ram_block_add(new_block, &local_err, share); + ram_block_add(new_block, &local_err, ram_flags & RAM_SHARED); if (local_err) { g_free(new_block); error_propagate(errp, local_err); @@ -2299,7 +2299,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, - bool share, const char *mem_path, + uint32_t ram_flags, const char *mem_path, Error **errp) { int fd; @@ -2311,7 +2311,7 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, return NULL; } - block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp); + block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, errp); if (!block) { if (created) { unlink(mem_path); diff --git a/include/exec/memory.h b/include/exec/memory.h index 6d0af29155..30e7166dd1 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -640,6 +640,7 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, void *host), Error **errp); #ifdef __linux__ + /** * memory_region_init_ram_from_file: Initialize RAM memory region with a * mmap-ed backend. @@ -651,7 +652,9 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, * @size: size of the region. * @align: alignment of the region base address; if 0, the default alignment * (getpagesize()) will be used. - * @share: %true if memory must be mmaped with the MAP_SHARED flag + * @ram_flags: Memory region features: + * - RAM_SHARED: memory must be mmaped with the MAP_SHARED flag + * Other bits are ignored now. * @path: the path in which to allocate the RAM. * @errp: pointer to Error*, to store an error if it happens. * @@ -663,7 +666,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, const char *name, uint64_t size, uint64_t align, - bool share, + uint32_t ram_flags, const char *path, Error **errp); diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index cf4ce06248..8a4a9bc614 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -71,12 +71,33 @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, } long qemu_getrampagesize(void); + +/** + * qemu_ram_alloc_from_file, + * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing + * file or device + * + * Parameters: + * @size: the size in bytes of the ram block + * @mr: the memory region where the ram block is + * @ram_flags: specify the properties of the ram block, which can be one + * or bit-or of following values + * - RAM_SHARED: mmap the backing file or device with MAP_SHARED + * Other bits are ignored. + * @mem_path or @fd: specify the backing file or device + * @errp: pointer to Error*, to store an error if it happens + * + * Return: + * On success, return a pointer to the ram block. + * On failure, return NULL. + */ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, - bool share, const char *mem_path, + uint32_t ram_flags, const char *mem_path, Error **errp); RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, - bool share, int fd, + uint32_t ram_flags, int fd, Error **errp); + RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, MemoryRegion *mr, Error **errp); RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr, diff --git a/memory.c b/memory.c index e9cd446968..d20f0a76fe 100644 --- a/memory.c +++ b/memory.c @@ -1551,7 +1551,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, const char *name, uint64_t size, uint64_t align, - bool share, + uint32_t ram_flags, const char *path, Error **errp) { @@ -1560,7 +1560,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, mr->terminates = true; mr->destructor = memory_region_destructor_ram; mr->align = align; - mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp); + mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path, errp); mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; } @@ -1576,7 +1576,9 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr, mr->ram = true; mr->terminates = true; mr->destructor = memory_region_destructor_ram; - mr->ram_block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp); + mr->ram_block = qemu_ram_alloc_from_fd(size, mr, + share ? RAM_SHARED : 0, + fd, errp); mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; } #endif diff --git a/numa.c b/numa.c index 5f6367b989..81542d4ebb 100644 --- a/numa.c +++ b/numa.c @@ -479,7 +479,7 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, if (mem_path) { #ifdef __linux__ Error *err = NULL; - memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, false, + memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, 0, mem_path, &err); if (err) { error_report_err(err); From 17824406fa55b303379f2e4af715c1e876c3535f Mon Sep 17 00:00:00 2001 From: Junyan He Date: Wed, 18 Jul 2018 15:47:59 +0800 Subject: [PATCH 3/7] configure: add libpmem support Add a pair of configure options --{enable,disable}-libpmem to control whether QEMU is compiled with PMDK libpmem [1]. QEMU may write to the host persistent memory (e.g. in vNVDIMM label emulation and live migration), so it must take the proper operations to ensure the persistence of its own writes. Depending on the CPU models and available instructions, the optimal operation can vary [2]. PMDK libpmem have already implemented those operations on multiple CPU models (x86 and ARM) and the logic to select the optimal ones, so QEMU can just use libpmem rather than re-implement them. Libpem is a part of PMDK project(formerly known as NMVL). The project's home page is: http://pmem.io/pmdk/ And the project's repository is: https://github.com/pmem/pmdk/ For more information about libpmem APIs, you can refer to the comments in source code of: pmdk/src/libpmem/pmem.c, begin at line 33. Signed-off-by: Junyan He Signed-off-by: Haozhong Zhang Reviewed-by: Stefan Hajnoczi Reviewed-by: Igor Mammedov Reviewed-by: Richard Henderson Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- configure | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/configure b/configure index 2a7796ea80..1c9288b17b 100755 --- a/configure +++ b/configure @@ -475,6 +475,7 @@ vxhs="" libxml2="" docker="no" debug_mutex="no" +libpmem="" # cross compilers defaults, can be overridden with --cross-cc-ARCH cross_cc_aarch64="aarch64-linux-gnu-gcc" @@ -1435,6 +1436,10 @@ for opt do ;; --disable-debug-mutex) debug_mutex=no ;; + --enable-libpmem) libpmem=yes + ;; + --disable-libpmem) libpmem=no + ;; *) echo "ERROR: unknown option $opt" echo "Try '$0 --help' for more information" @@ -1710,6 +1715,7 @@ disabled with --disable-FEATURE, default is enabled if available: vhost-user vhost-user support capstone capstone disassembler support debug-mutex mutex debugging support + libpmem libpmem support NOTE: The object files are built at the place where configure is launched EOF @@ -5545,6 +5551,24 @@ if has "docker"; then docker=$($python $source_path/tests/docker/docker.py probe) fi +########################################## +# check for libpmem + +if test "$libpmem" != "no"; then + if $pkg_config --exists "libpmem"; then + libpmem="yes" + libpmem_libs=$($pkg_config --libs libpmem) + libpmem_cflags=$($pkg_config --cflags libpmem) + libs_softmmu="$libs_softmmu $libpmem_libs" + QEMU_CFLAGS="$QEMU_CFLAGS $libpmem_cflags" + else + if test "$libpmem" = "yes" ; then + feature_not_found "libpmem" "Install nvml or pmdk" + fi + libpmem="no" + fi +fi + ########################################## # End of CC checks # After here, no more $cc or $ld runs @@ -6010,6 +6034,7 @@ echo "replication support $replication" echo "VxHS block device $vxhs" echo "capstone $capstone" echo "docker $docker" +echo "libpmem support $libpmem" if test "$sdl_too_old" = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -6763,6 +6788,10 @@ if test "$vxhs" = "yes" ; then echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak fi +if test "$libpmem" = "yes" ; then + echo "CONFIG_LIBPMEM=y" >> $config_host_mak +fi + if test "$tcg_interpreter" = "yes"; then QEMU_INCLUDES="-iquote \$(SRC_PATH)/tcg/tci $QEMU_INCLUDES" elif test "$ARCH" = "sparc64" ; then From a4de8552b2580adf6fa4874439217b65d3bdd88b Mon Sep 17 00:00:00 2001 From: Junyan He Date: Wed, 18 Jul 2018 15:48:00 +0800 Subject: [PATCH 4/7] hostmem-file: add the 'pmem' option When QEMU emulates vNVDIMM labels and migrates vNVDIMM devices, it needs to know whether the backend storage is a real persistent memory, in order to decide whether special operations should be performed to ensure the data persistence. This boolean option 'pmem' allows users to specify whether the backend storage of memory-backend-file is a real persistent memory. If 'pmem=on', QEMU will set the flag RAM_PMEM in the RAM block of the corresponding memory region. If 'pmem' is set while lack of libpmem support, a error is generated. Signed-off-by: Junyan He Signed-off-by: Haozhong Zhang Reviewed-by: Stefan Hajnoczi Reviewed-by: Igor Mammedov Reviewed-by: Richard Henderson Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- backends/hostmem-file.c | 43 +++++++++++++++++++++++++++++++++++++++-- docs/nvdimm.txt | 22 +++++++++++++++++++++ exec.c | 8 ++++++++ include/exec/memory.h | 4 ++++ include/exec/ram_addr.h | 3 +++ qemu-options.hx | 7 +++++++ 6 files changed, 85 insertions(+), 2 deletions(-) diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index 34c68bb081..2476dcb435 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -12,6 +12,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu-common.h" +#include "qemu/error-report.h" #include "sysemu/hostmem.h" #include "sysemu/sysemu.h" #include "qom/object_interfaces.h" @@ -31,9 +32,10 @@ typedef struct HostMemoryBackendFile HostMemoryBackendFile; struct HostMemoryBackendFile { HostMemoryBackend parent_obj; - bool discard_data; char *mem_path; uint64_t align; + bool discard_data; + bool is_pmem; }; static void @@ -59,7 +61,8 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), path, backend->size, fb->align, - backend->share ? RAM_SHARED : 0, + (backend->share ? RAM_SHARED : 0) | + (fb->is_pmem ? RAM_PMEM : 0), fb->mem_path, errp); g_free(path); } @@ -131,6 +134,39 @@ static void file_memory_backend_set_align(Object *o, Visitor *v, error_propagate(errp, local_err); } +static bool file_memory_backend_get_pmem(Object *o, Error **errp) +{ + return MEMORY_BACKEND_FILE(o)->is_pmem; +} + +static void file_memory_backend_set_pmem(Object *o, bool value, Error **errp) +{ + HostMemoryBackend *backend = MEMORY_BACKEND(o); + HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o); + + if (host_memory_backend_mr_inited(backend)) { + error_setg(errp, "cannot change property 'pmem' of %s '%s'", + object_get_typename(o), + object_get_canonical_path_component(o)); + return; + } + +#ifndef CONFIG_LIBPMEM + if (value) { + Error *local_err = NULL; + error_setg(&local_err, + "Lack of libpmem support while setting the 'pmem=on'" + " of %s '%s'. We can't ensure data persistence.", + object_get_typename(o), + object_get_canonical_path_component(o)); + error_propagate(errp, local_err); + return; + } +#endif + + fb->is_pmem = value; +} + static void file_backend_unparent(Object *obj) { HostMemoryBackend *backend = MEMORY_BACKEND(obj); @@ -162,6 +198,9 @@ file_backend_class_init(ObjectClass *oc, void *data) file_memory_backend_get_align, file_memory_backend_set_align, NULL, NULL, &error_abort); + object_class_property_add_bool(oc, "pmem", + file_memory_backend_get_pmem, file_memory_backend_set_pmem, + &error_abort); } static void file_backend_instance_finalize(Object *o) diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt index 24b443b655..5f158a6170 100644 --- a/docs/nvdimm.txt +++ b/docs/nvdimm.txt @@ -173,3 +173,25 @@ There are currently two valid values for this option: the NVDIMMs in the event of power loss. This implies that the platform also supports flushing dirty data through the memory controller on power loss. + +If the vNVDIMM backend is in host persistent memory that can be accessed in +SNIA NVM Programming Model [1] (e.g., Intel NVDIMM), it's suggested to set +the 'pmem' option of memory-backend-file to 'on'. When 'pmem' is 'on' and QEMU +is built with libpmem [2] support (configured with --enable-libpmem), QEMU +will take necessary operations to guarantee the persistence of its own writes +to the vNVDIMM backend(e.g., in vNVDIMM label emulation and live migration). +If 'pmem' is 'on' while there is no libpmem support, qemu will exit and report +a "lack of libpmem support" message to ensure the persistence is available. +For example, if we want to ensure the persistence for some backend file, +use the QEMU command line: + + -object memory-backend-file,id=nv_mem,mem-path=/XXX/yyy,size=4G,pmem=on + +References +---------- + +[1] NVM Programming Model (NPM) + Version 1.2 + https://www.snia.org/sites/default/files/technical_work/final/NVMProgrammingModel_v1.2.pdf +[2] Persistent Memory Development Kit (PMDK), formerly known as NVML project, home page: + http://pmem.io/pmdk/ diff --git a/exec.c b/exec.c index 3b8f91448d..fa3fbc6646 100644 --- a/exec.c +++ b/exec.c @@ -2245,6 +2245,9 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, Error *local_err = NULL; int64_t file_size; + /* Just support these ram flags by now. */ + assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0); + if (xen_enabled()) { error_setg(errp, "-mem-path not supported with Xen"); return NULL; @@ -4072,6 +4075,11 @@ err: return ret; } +bool ramblock_is_pmem(RAMBlock *rb) +{ + return rb->flags & RAM_PMEM; +} + #endif void page_size_init(void) diff --git a/include/exec/memory.h b/include/exec/memory.h index 30e7166dd1..cd62029a7d 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -123,6 +123,9 @@ typedef struct IOMMUNotifier IOMMUNotifier; /* RAM can be migrated */ #define RAM_MIGRATABLE (1 << 4) +/* RAM is a persistent kind memory */ +#define RAM_PMEM (1 << 5) + static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, IOMMUNotifierFlag flags, hwaddr start, hwaddr end, @@ -654,6 +657,7 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, * (getpagesize()) will be used. * @ram_flags: Memory region features: * - RAM_SHARED: memory must be mmaped with the MAP_SHARED flag + * - RAM_PMEM: the memory is persistent memory * Other bits are ignored now. * @path: the path in which to allocate the RAM. * @errp: pointer to Error*, to store an error if it happens. diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 8a4a9bc614..3abb639056 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -70,6 +70,8 @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr, return host_addr_offset >> TARGET_PAGE_BITS; } +bool ramblock_is_pmem(RAMBlock *rb); + long qemu_getrampagesize(void); /** @@ -83,6 +85,7 @@ long qemu_getrampagesize(void); * @ram_flags: specify the properties of the ram block, which can be one * or bit-or of following values * - RAM_SHARED: mmap the backing file or device with MAP_SHARED + * - RAM_PMEM: the backend @mem_path or @fd is persistent memory * Other bits are ignored. * @mem_path or @fd: specify the backing file or device * @errp: pointer to Error*, to store an error if it happens diff --git a/qemu-options.hx b/qemu-options.hx index b1bf0f485f..9b920f294f 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4070,6 +4070,13 @@ requires an alignment different than the default one used by QEMU, eg the device DAX /dev/dax0.0 requires 2M alignment rather than 4K. In such cases, users can specify the required alignment via this option. +The @option{pmem} option specifies whether the backing file specified +by @option{mem-path} is in host persistent memory that can be accessed +using the SNIA NVM programming model (e.g. Intel NVDIMM). +If @option{pmem} is set to 'on', QEMU will take necessary operations to +guarantee the persistence of its own writes to @option{mem-path} +(e.g. in vNVDIMM label emulation and live migration). + @item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},share=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave} Creates a memory backend object, which can be used to back the guest RAM. From faf8a13d80de98b43342a7ec9878b4fd76b18327 Mon Sep 17 00:00:00 2001 From: Junyan He Date: Wed, 18 Jul 2018 15:48:01 +0800 Subject: [PATCH 5/7] mem/nvdimm: ensure write persistence to PMEM in label emulation Guest writes to vNVDIMM labels are intercepted and performed on the backend by QEMU. When the backend is a real persistent memort, QEMU needs to take proper operations to ensure its write persistence on the persistent memory. Otherwise, a host power failure may result in the loss of guest label configurations. Signed-off-by: Haozhong Zhang Reviewed-by: Stefan Hajnoczi Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Richard Henderson --- hw/mem/nvdimm.c | 9 ++++++++- include/qemu/pmem.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 include/qemu/pmem.h diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c index 021d1c3997..1c6674c4ed 100644 --- a/hw/mem/nvdimm.c +++ b/hw/mem/nvdimm.c @@ -23,6 +23,7 @@ */ #include "qemu/osdep.h" +#include "qemu/pmem.h" #include "qapi/error.h" #include "qapi/visitor.h" #include "hw/mem/nvdimm.h" @@ -164,11 +165,17 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf, { MemoryRegion *mr; PCDIMMDevice *dimm = PC_DIMM(nvdimm); + bool is_pmem = object_property_get_bool(OBJECT(dimm->hostmem), + "pmem", NULL); uint64_t backend_offset; nvdimm_validate_rw_label_data(nvdimm, size, offset); - memcpy(nvdimm->label_data + offset, buf, size); + if (!is_pmem) { + memcpy(nvdimm->label_data + offset, buf, size); + } else { + pmem_memcpy_persist(nvdimm->label_data + offset, buf, size); + } mr = host_memory_backend_get_memory(dimm->hostmem); backend_offset = memory_region_size(mr) - nvdimm->label_size + offset; diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h new file mode 100644 index 0000000000..ebdb070210 --- /dev/null +++ b/include/qemu/pmem.h @@ -0,0 +1,30 @@ +/* + * QEMU header file for libpmem. + * + * Copyright (c) 2018 Intel Corporation. + * + * Author: Haozhong Zhang + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef QEMU_PMEM_H +#define QEMU_PMEM_H + +#ifdef CONFIG_LIBPMEM +#include +#else /* !CONFIG_LIBPMEM */ + +static inline void * +pmem_memcpy_persist(void *pmemdest, const void *src, size_t len) +{ + /* If 'pmem' option is 'on', we should always have libpmem support, + or qemu will report a error and exit, never come here. */ + g_assert_not_reached(); + return NULL; +} + +#endif /* CONFIG_LIBPMEM */ + +#endif /* !QEMU_PMEM_H */ From 469dd51bc664979f159d47885997d482991394b8 Mon Sep 17 00:00:00 2001 From: Junyan He Date: Wed, 18 Jul 2018 15:48:02 +0800 Subject: [PATCH 6/7] migration/ram: Add check and info message to nvdimm post copy. The nvdimm kind memory does not support post copy now. We disable post copy if we have nvdimm memory and print some log hint to user. Signed-off-by: Junyan He Reviewed-by: Stefan Hajnoczi Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- migration/ram.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/migration/ram.c b/migration/ram.c index 24dea2730c..5beefae7f5 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3906,6 +3906,15 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) static bool ram_has_postcopy(void *opaque) { + RAMBlock *rb; + RAMBLOCK_FOREACH_MIGRATABLE(rb) { + if (ramblock_is_pmem(rb)) { + info_report("Block: %s, host: %p is a nvdimm memory, postcopy" + "is not supported now!", rb->idstr, rb->host); + return false; + } + } + return migrate_postcopy_ram(); } From 56eb90af39abf66c0e80588a9f50c31e7df7320b Mon Sep 17 00:00:00 2001 From: Junyan He Date: Wed, 18 Jul 2018 15:48:03 +0800 Subject: [PATCH 7/7] migration/ram: ensure write persistence on loading all data to PMEM. Because we need to make sure the pmem kind memory data is synced after migration, we choose to call pmem_persist() when the migration finish. This will make sure the data of pmem is safe and will not lose if power is off. Signed-off-by: Junyan He Reviewed-by: Stefan Hajnoczi Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/qemu/pmem.h | 6 ++++++ migration/ram.c | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h index ebdb070210..dfb6d0da62 100644 --- a/include/qemu/pmem.h +++ b/include/qemu/pmem.h @@ -25,6 +25,12 @@ pmem_memcpy_persist(void *pmemdest, const void *src, size_t len) return NULL; } +static inline void +pmem_persist(const void *addr, size_t len) +{ + g_assert_not_reached(); +} + #endif /* CONFIG_LIBPMEM */ #endif /* !QEMU_PMEM_H */ diff --git a/migration/ram.c b/migration/ram.c index 5beefae7f5..fa79d0a5b9 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -33,6 +33,7 @@ #include "qemu/bitops.h" #include "qemu/bitmap.h" #include "qemu/main-loop.h" +#include "qemu/pmem.h" #include "xbzrle.h" #include "ram.h" #include "migration.h" @@ -3547,6 +3548,13 @@ static int ram_load_setup(QEMUFile *f, void *opaque) static int ram_load_cleanup(void *opaque) { RAMBlock *rb; + + RAMBLOCK_FOREACH_MIGRATABLE(rb) { + if (ramblock_is_pmem(rb)) { + pmem_persist(rb->host, rb->used_length); + } + } + xbzrle_load_cleanup(); compress_threads_load_cleanup();