pc: fixes

This includes nvdimm persistence fixes queued before the release. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- iQEcBAABAgAGBQJbepoTAAoJECgfDbjSjVRpLioH/3BPps8FLh4x2gZSq3B+u72O RYUA3I3TilEGyc9yf8o7e1Hf+pQAJBEmulcnKxXFVWZIJ1GVLPt4NZCMQGiPDnJL +RCT/Q64PUy09hRjddAasikrvXa4YOsRgBgJJToO7v9PSQSaU3fC7O3hNea7KcF/ C4SSqkUgxyDhCCYHHblpKxFz/wtwy4ZaCGSdozIdmKNPJ6/ye8wOQ1Mq9e1Mwp18 S6ilJub5IwB6aM2KVMmX4AFomF4u2cn153ts8fI+Dyo4/NE6P4+viDlz3BOBKdzm kmd49h6/n4Lenoo4oI1yNHSuIJJTVfvnoLu6rG7mPbQKgxNd1uN4KuUIygU5PCY= =Xcaj -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging pc: fixes This includes nvdimm persistence fixes queued before the release. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Mon 20 Aug 2018 11:38:11 BST # gpg: using RSA key 281F0DB8D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: migration/ram: ensure write persistence on loading all data to PMEM. migration/ram: Add check and info message to nvdimm post copy. mem/nvdimm: ensure write persistence to PMEM in label emulation hostmem-file: add the 'pmem' option configure: add libpmem support memory, exec: switch file ram allocation functions to 'flags' parameters memory, exec: Expose all memory block related flags. Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2018-08-21 10:23:53 +01:00 · 2018-08-21 10:23:53 +01:00 · 55f4e79d79
parent 90b9508e21 56eb90af39
commit 55f4e79d79
12 changed files with 235 additions and 36 deletions
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@ -12,6 +12,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
+#include "qemu/error-report.h"
 #include "sysemu/hostmem.h"
 #include "sysemu/sysemu.h"
 #include "qom/object_interfaces.h"
@ -31,9 +32,10 @@ typedef struct HostMemoryBackendFile HostMemoryBackendFile;
 struct HostMemoryBackendFile {
    HostMemoryBackend parent_obj;

-    bool discard_data;
    char *mem_path;
    uint64_t align;
+    bool discard_data;
+    bool is_pmem;
 };

 static void
@ -58,7 +60,9 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
        path = object_get_canonical_path(OBJECT(backend));
        memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
                                 path,
-                                 backend->size, fb->align, backend->share,
+                                 backend->size, fb->align,
+                                 (backend->share ? RAM_SHARED : 0) |
+                                 (fb->is_pmem ? RAM_PMEM : 0),
                                 fb->mem_path, errp);
        g_free(path);
    }
@ -130,6 +134,39 @@ static void file_memory_backend_set_align(Object *o, Visitor *v,
    error_propagate(errp, local_err);
 }

+static bool file_memory_backend_get_pmem(Object *o, Error **errp)
+{
+    return MEMORY_BACKEND_FILE(o)->is_pmem;
+}
+
+static void file_memory_backend_set_pmem(Object *o, bool value, Error **errp)
+{
+    HostMemoryBackend *backend = MEMORY_BACKEND(o);
+    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+
+    if (host_memory_backend_mr_inited(backend)) {
+        error_setg(errp, "cannot change property 'pmem' of %s '%s'",
+                   object_get_typename(o),
+                   object_get_canonical_path_component(o));
+        return;
+    }
+
+#ifndef CONFIG_LIBPMEM
+    if (value) {
+        Error *local_err = NULL;
+        error_setg(&local_err,
+                   "Lack of libpmem support while setting the 'pmem=on'"
+                   " of %s '%s'. We can't ensure data persistence.",
+                   object_get_typename(o),
+                   object_get_canonical_path_component(o));
+        error_propagate(errp, local_err);
+        return;
+    }
+#endif
+
+    fb->is_pmem = value;
+}
+
 static void file_backend_unparent(Object *obj)
 {
    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
@ -161,6 +198,9 @@ file_backend_class_init(ObjectClass *oc, void *data)
        file_memory_backend_get_align,
        file_memory_backend_set_align,
        NULL, NULL, &error_abort);
+    object_class_property_add_bool(oc, "pmem",
+        file_memory_backend_get_pmem, file_memory_backend_set_pmem,
+        &error_abort);
 }

 static void file_backend_instance_finalize(Object *o)
--- a/29
+++ b/29
@ -476,6 +476,7 @@ vxhs=""
 libxml2=""
 docker="no"
 debug_mutex="no"
+libpmem=""

 # cross compilers defaults, can be overridden with --cross-cc-ARCH
 cross_cc_aarch64="aarch64-linux-gnu-gcc"
@ -1440,6 +1441,10 @@ for opt do
  ;;
  --disable-debug-mutex) debug_mutex=no
  ;;
+  --enable-libpmem) libpmem=yes
+  ;;
+  --disable-libpmem) libpmem=no
+  ;;
  *)
      echo "ERROR: unknown option $opt"
      echo "Try '$0 --help' for more information"
@ -1716,6 +1721,7 @@ disabled with --disable-FEATURE, default is enabled if available:
  vhost-user      vhost-user support
  capstone        capstone disassembler support
  debug-mutex     mutex debugging support
+  libpmem         libpmem support

 NOTE: The object files are built at the place where configure is launched
 EOF
@ -5593,6 +5599,24 @@ if has "docker"; then
    docker=$($python $source_path/tests/docker/docker.py probe)
 fi

+##########################################
+# check for libpmem
+
+if test "$libpmem" != "no"; then
+	if $pkg_config --exists "libpmem"; then
+		libpmem="yes"
+		libpmem_libs=$($pkg_config --libs libpmem)
+		libpmem_cflags=$($pkg_config --cflags libpmem)
+		libs_softmmu="$libs_softmmu $libpmem_libs"
+		QEMU_CFLAGS="$QEMU_CFLAGS $libpmem_cflags"
+	else
+		if test "$libpmem" = "yes" ; then
+			feature_not_found "libpmem" "Install nvml or pmdk"
+		fi
+		libpmem="no"
+	fi
+fi
+
 ##########################################
 # End of CC checks
 # After here, no more $cc or $ld runs
@ -6059,6 +6083,7 @@ echo "replication support $replication"
 echo "VxHS block device $vxhs"
 echo "capstone          $capstone"
 echo "docker            $docker"
+echo "libpmem support   $libpmem"

 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@ -6816,6 +6841,10 @@ if test "$vxhs" = "yes" ; then
  echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak
 fi

+if test "$libpmem" = "yes" ; then
+  echo "CONFIG_LIBPMEM=y" >> $config_host_mak
+fi
+
 if test "$tcg_interpreter" = "yes"; then
  QEMU_INCLUDES="-iquote \$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
 elif test "$ARCH" = "sparc64" ; then
--- a/docs/nvdimm.txt
+++ b/docs/nvdimm.txt
@ -173,3 +173,25 @@ There are currently two valid values for this option:
             the NVDIMMs in the event of power loss.  This implies that the
             platform also supports flushing dirty data through the memory
             controller on power loss.
+
+If the vNVDIMM backend is in host persistent memory that can be accessed in
+SNIA NVM Programming Model [1] (e.g., Intel NVDIMM), it's suggested to set
+the 'pmem' option of memory-backend-file to 'on'. When 'pmem' is 'on' and QEMU
+is built with libpmem [2] support (configured with --enable-libpmem), QEMU
+will take necessary operations to guarantee the persistence of its own writes
+to the vNVDIMM backend(e.g., in vNVDIMM label emulation and live migration).
+If 'pmem' is 'on' while there is no libpmem support, qemu will exit and report
+a "lack of libpmem support" message to ensure the persistence is available.
+For example, if we want to ensure the persistence for some backend file,
+use the QEMU command line:
+
+    -object memory-backend-file,id=nv_mem,mem-path=/XXX/yyy,size=4G,pmem=on
+
+References
+----------
+
+[1] NVM Programming Model (NPM)
+	Version 1.2
+    https://www.snia.org/sites/default/files/technical_work/final/NVMProgrammingModel_v1.2.pdf
+[2] Persistent Memory Development Kit (PMDK), formerly known as NVML project, home page:
+    http://pmem.io/pmdk/
--- a/exec.c
+++ b/exec.c
@ -87,26 +87,6 @@ AddressSpace address_space_memory;

 MemoryRegion io_mem_rom, io_mem_notdirty;
 static MemoryRegion io_mem_unassigned;
-
-/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
-#define RAM_PREALLOC   (1 << 0)
-
-/* RAM is mmap-ed with MAP_SHARED */
-#define RAM_SHARED     (1 << 1)
-
-/* Only a portion of RAM (used_length) is actually used, and migrated.
- * This used_length size can change across reboots.
- */
-#define RAM_RESIZEABLE (1 << 2)
-
-/* UFFDIO_ZEROPAGE is available on this RAMBlock to atomically
- * zero the page and wake waiting processes.
- * (Set during postcopy)
- */
-#define RAM_UF_ZEROPAGE (1 << 3)
-
-/* RAM can be migrated */
-#define RAM_MIGRATABLE (1 << 4)
 #endif

 #ifdef TARGET_PAGE_BITS_VARY
@ -2252,13 +2232,16 @@ static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared)

 #ifdef __linux__
 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
-                                 bool share, int fd,
+                                 uint32_t ram_flags, int fd,
                                 Error **errp)
 {
    RAMBlock *new_block;
    Error *local_err = NULL;
    int64_t file_size;

+    /* Just support these ram flags by now. */
+    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0);
+
    if (xen_enabled()) {
        error_setg(errp, "-mem-path not supported with Xen");
        return NULL;
@ -2294,14 +2277,14 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
    new_block->mr = mr;
    new_block->used_length = size;
    new_block->max_length = size;
-    new_block->flags = share ? RAM_SHARED : 0;
+    new_block->flags = ram_flags;
    new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp);
    if (!new_block->host) {
        g_free(new_block);
        return NULL;
    }

-    ram_block_add(new_block, &local_err, share);
+    ram_block_add(new_block, &local_err, ram_flags & RAM_SHARED);
    if (local_err) {
        g_free(new_block);
        error_propagate(errp, local_err);
@ -2313,7 +2296,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,


 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
-                                   bool share, const char *mem_path,
+                                   uint32_t ram_flags, const char *mem_path,
                                   Error **errp)
 {
    int fd;
@ -2325,7 +2308,7 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
        return NULL;
    }

-    block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp);
+    block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, errp);
    if (!block) {
        if (created) {
            unlink(mem_path);
@ -4086,6 +4069,11 @@ err:
    return ret;
 }

+bool ramblock_is_pmem(RAMBlock *rb)
+{
+    return rb->flags & RAM_PMEM;
+}
+
 #endif

 void page_size_init(void)
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@ -23,6 +23,7 @@
 */

 #include "qemu/osdep.h"
+#include "qemu/pmem.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
 #include "hw/mem/nvdimm.h"
@ -164,11 +165,17 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf,
 {
    MemoryRegion *mr;
    PCDIMMDevice *dimm = PC_DIMM(nvdimm);
+    bool is_pmem = object_property_get_bool(OBJECT(dimm->hostmem),
+                                            "pmem", NULL);
    uint64_t backend_offset;

    nvdimm_validate_rw_label_data(nvdimm, size, offset);

-    memcpy(nvdimm->label_data + offset, buf, size);
+    if (!is_pmem) {
+        memcpy(nvdimm->label_data + offset, buf, size);
+    } else {
+        pmem_memcpy_persist(nvdimm->label_data + offset, buf, size);
+    }

    mr = host_memory_backend_get_memory(dimm->hostmem);
    backend_offset = memory_region_size(mr) - nvdimm->label_size + offset;
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@ -103,6 +103,29 @@ struct IOMMUNotifier {
 };
 typedef struct IOMMUNotifier IOMMUNotifier;

+/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
+#define RAM_PREALLOC   (1 << 0)
+
+/* RAM is mmap-ed with MAP_SHARED */
+#define RAM_SHARED     (1 << 1)
+
+/* Only a portion of RAM (used_length) is actually used, and migrated.
+ * This used_length size can change across reboots.
+ */
+#define RAM_RESIZEABLE (1 << 2)
+
+/* UFFDIO_ZEROPAGE is available on this RAMBlock to atomically
+ * zero the page and wake waiting processes.
+ * (Set during postcopy)
+ */
+#define RAM_UF_ZEROPAGE (1 << 3)
+
+/* RAM can be migrated */
+#define RAM_MIGRATABLE (1 << 4)
+
+/* RAM is a persistent kind memory */
+#define RAM_PMEM (1 << 5)
+
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                       IOMMUNotifierFlag flags,
                                       hwaddr start, hwaddr end,
@ -611,6 +634,7 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
                                                       void *host),
                                       Error **errp);
 #ifdef __linux__
+
 /**
 * memory_region_init_ram_from_file:  Initialize RAM memory region with a
 *                                    mmap-ed backend.
@ -622,7 +646,10 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
 * @size: size of the region.
 * @align: alignment of the region base address; if 0, the default alignment
 *         (getpagesize()) will be used.
- * @share: %true if memory must be mmaped with the MAP_SHARED flag
+ * @ram_flags: Memory region features:
+ *             - RAM_SHARED: memory must be mmaped with the MAP_SHARED flag
+ *             - RAM_PMEM: the memory is persistent memory
+ *             Other bits are ignored now.
 * @path: the path in which to allocate the RAM.
 * @errp: pointer to Error*, to store an error if it happens.
 *
@ -634,7 +661,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
                                      const char *name,
                                      uint64_t size,
                                      uint64_t align,
-                                      bool share,
+                                      uint32_t ram_flags,
                                      const char *path,
                                      Error **errp);

--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@ -70,13 +70,37 @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
    return host_addr_offset >> TARGET_PAGE_BITS;
 }

+bool ramblock_is_pmem(RAMBlock *rb);
+
 long qemu_getrampagesize(void);
+
+/**
+ * qemu_ram_alloc_from_file,
+ * qemu_ram_alloc_from_fd:  Allocate a ram block from the specified backing
+ *                          file or device
+ *
+ * Parameters:
+ *  @size: the size in bytes of the ram block
+ *  @mr: the memory region where the ram block is
+ *  @ram_flags: specify the properties of the ram block, which can be one
+ *              or bit-or of following values
+ *              - RAM_SHARED: mmap the backing file or device with MAP_SHARED
+ *              - RAM_PMEM: the backend @mem_path or @fd is persistent memory
+ *              Other bits are ignored.
+ *  @mem_path or @fd: specify the backing file or device
+ *  @errp: pointer to Error*, to store an error if it happens
+ *
+ * Return:
+ *  On success, return a pointer to the ram block.
+ *  On failure, return NULL.
+ */
 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
-                                   bool share, const char *mem_path,
+                                   uint32_t ram_flags, const char *mem_path,
                                   Error **errp);
 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
-                                 bool share, int fd,
+                                 uint32_t ram_flags, int fd,
                                 Error **errp);
+
 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                  MemoryRegion *mr, Error **errp);
 RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr,
--- a/include/qemu/pmem.h
+++ b/include/qemu/pmem.h
@ -0,0 +1,36 @@
+/*
+ * QEMU header file for libpmem.
+ *
+ * Copyright (c) 2018 Intel Corporation.
+ *
+ * Author: Haozhong Zhang <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_PMEM_H
+#define QEMU_PMEM_H
+
+#ifdef CONFIG_LIBPMEM
+#include <libpmem.h>
+#else  /* !CONFIG_LIBPMEM */
+
+static inline void *
+pmem_memcpy_persist(void *pmemdest, const void *src, size_t len)
+{
+    /* If 'pmem' option is 'on', we should always have libpmem support,
+       or qemu will report a error and exit, never come here. */
+    g_assert_not_reached();
+    return NULL;
+}
+
+static inline void
+pmem_persist(const void *addr, size_t len)
+{
+    g_assert_not_reached();
+}
+
+#endif /* CONFIG_LIBPMEM */
+
+#endif /* !QEMU_PMEM_H */
--- a/memory.c
+++ b/memory.c
@ -1551,7 +1551,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
                                      const char *name,
                                      uint64_t size,
                                      uint64_t align,
-                                      bool share,
+                                      uint32_t ram_flags,
                                      const char *path,
                                      Error **errp)
 {
@ -1560,7 +1560,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
    mr->terminates = true;
    mr->destructor = memory_region_destructor_ram;
    mr->align = align;
-    mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp);
+    mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path, errp);
    mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
 }

@ -1576,7 +1576,9 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
    mr->ram = true;
    mr->terminates = true;
    mr->destructor = memory_region_destructor_ram;
-    mr->ram_block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp);
+    mr->ram_block = qemu_ram_alloc_from_fd(size, mr,
+                                           share ? RAM_SHARED : 0,
+                                           fd, errp);
    mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
 }
 #endif
--- a/migration/ram.c
+++ b/migration/ram.c
@ -33,6 +33,7 @@
 #include "qemu/bitops.h"
 #include "qemu/bitmap.h"
 #include "qemu/main-loop.h"
+#include "qemu/pmem.h"
 #include "xbzrle.h"
 #include "ram.h"
 #include "migration.h"
@ -3547,6 +3548,13 @@ static int ram_load_setup(QEMUFile *f, void *opaque)
 static int ram_load_cleanup(void *opaque)
 {
    RAMBlock *rb;
+
+    RAMBLOCK_FOREACH_MIGRATABLE(rb) {
+        if (ramblock_is_pmem(rb)) {
+            pmem_persist(rb->host, rb->used_length);
+        }
+    }
+
    xbzrle_load_cleanup();
    compress_threads_load_cleanup();

@ -3906,6 +3914,15 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)

 static bool ram_has_postcopy(void *opaque)
 {
+    RAMBlock *rb;
+    RAMBLOCK_FOREACH_MIGRATABLE(rb) {
+        if (ramblock_is_pmem(rb)) {
+            info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
+                         "is not supported now!", rb->idstr, rb->host);
+            return false;
+        }
+    }
+
    return migrate_postcopy_ram();
 }

--- a/numa.c
+++ b/numa.c
@ -479,7 +479,7 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
    if (mem_path) {
 #ifdef __linux__
        Error *err = NULL;
-        memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, false,
+        memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, 0,
                                         mem_path, &err);
        if (err) {
            error_report_err(err);
--- a/qemu-options.hx
+++ b/qemu-options.hx
@ -4048,6 +4048,13 @@ requires an alignment different than the default one used by QEMU, eg
 the device DAX /dev/dax0.0 requires 2M alignment rather than 4K. In
 such cases, users can specify the required alignment via this option.

+The @option{pmem} option specifies whether the backing file specified
+by @option{mem-path} is in host persistent memory that can be accessed
+using the SNIA NVM programming model (e.g. Intel NVDIMM).
+If @option{pmem} is set to 'on', QEMU will take necessary operations to
+guarantee the persistence of its own writes to @option{mem-path}
+(e.g. in vNVDIMM label emulation and live migration).
+
@item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},share=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave}

 Creates a memory backend object, which can be used to back the guest RAM.