pc: fixes

This includes nvdimm persistence fixes queued before the release.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJbepoTAAoJECgfDbjSjVRpLioH/3BPps8FLh4x2gZSq3B+u72O
 RYUA3I3TilEGyc9yf8o7e1Hf+pQAJBEmulcnKxXFVWZIJ1GVLPt4NZCMQGiPDnJL
 +RCT/Q64PUy09hRjddAasikrvXa4YOsRgBgJJToO7v9PSQSaU3fC7O3hNea7KcF/
 C4SSqkUgxyDhCCYHHblpKxFz/wtwy4ZaCGSdozIdmKNPJ6/ye8wOQ1Mq9e1Mwp18
 S6ilJub5IwB6aM2KVMmX4AFomF4u2cn153ts8fI+Dyo4/NE6P4+viDlz3BOBKdzm
 kmd49h6/n4Lenoo4oI1yNHSuIJJTVfvnoLu6rG7mPbQKgxNd1uN4KuUIygU5PCY=
 =Xcaj
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging

pc: fixes

This includes nvdimm persistence fixes queued before the release.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Mon 20 Aug 2018 11:38:11 BST
# gpg:                using RSA key 281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  migration/ram: ensure write persistence on loading all data to PMEM.
  migration/ram: Add check and info message to nvdimm post copy.
  mem/nvdimm: ensure write persistence to PMEM in label emulation
  hostmem-file: add the 'pmem' option
  configure: add libpmem support
  memory, exec: switch file ram allocation functions to 'flags' parameters
  memory, exec: Expose all memory block related flags.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2018-08-21 10:23:53 +01:00
commit 55f4e79d79
12 changed files with 235 additions and 36 deletions

View File

@ -12,6 +12,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu-common.h"
#include "qemu/error-report.h"
#include "sysemu/hostmem.h"
#include "sysemu/sysemu.h"
#include "qom/object_interfaces.h"
@ -31,9 +32,10 @@ typedef struct HostMemoryBackendFile HostMemoryBackendFile;
struct HostMemoryBackendFile {
HostMemoryBackend parent_obj;
bool discard_data;
char *mem_path;
uint64_t align;
bool discard_data;
bool is_pmem;
};
static void
@ -58,7 +60,9 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
path = object_get_canonical_path(OBJECT(backend));
memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
path,
backend->size, fb->align, backend->share,
backend->size, fb->align,
(backend->share ? RAM_SHARED : 0) |
(fb->is_pmem ? RAM_PMEM : 0),
fb->mem_path, errp);
g_free(path);
}
@ -130,6 +134,39 @@ static void file_memory_backend_set_align(Object *o, Visitor *v,
error_propagate(errp, local_err);
}
static bool file_memory_backend_get_pmem(Object *o, Error **errp)
{
return MEMORY_BACKEND_FILE(o)->is_pmem;
}
static void file_memory_backend_set_pmem(Object *o, bool value, Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(o);
HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
if (host_memory_backend_mr_inited(backend)) {
error_setg(errp, "cannot change property 'pmem' of %s '%s'",
object_get_typename(o),
object_get_canonical_path_component(o));
return;
}
#ifndef CONFIG_LIBPMEM
if (value) {
Error *local_err = NULL;
error_setg(&local_err,
"Lack of libpmem support while setting the 'pmem=on'"
" of %s '%s'. We can't ensure data persistence.",
object_get_typename(o),
object_get_canonical_path_component(o));
error_propagate(errp, local_err);
return;
}
#endif
fb->is_pmem = value;
}
static void file_backend_unparent(Object *obj)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
@ -161,6 +198,9 @@ file_backend_class_init(ObjectClass *oc, void *data)
file_memory_backend_get_align,
file_memory_backend_set_align,
NULL, NULL, &error_abort);
object_class_property_add_bool(oc, "pmem",
file_memory_backend_get_pmem, file_memory_backend_set_pmem,
&error_abort);
}
static void file_backend_instance_finalize(Object *o)

29
configure vendored
View File

@ -476,6 +476,7 @@ vxhs=""
libxml2=""
docker="no"
debug_mutex="no"
libpmem=""
# cross compilers defaults, can be overridden with --cross-cc-ARCH
cross_cc_aarch64="aarch64-linux-gnu-gcc"
@ -1440,6 +1441,10 @@ for opt do
;;
--disable-debug-mutex) debug_mutex=no
;;
--enable-libpmem) libpmem=yes
;;
--disable-libpmem) libpmem=no
;;
*)
echo "ERROR: unknown option $opt"
echo "Try '$0 --help' for more information"
@ -1716,6 +1721,7 @@ disabled with --disable-FEATURE, default is enabled if available:
vhost-user vhost-user support
capstone capstone disassembler support
debug-mutex mutex debugging support
libpmem libpmem support
NOTE: The object files are built at the place where configure is launched
EOF
@ -5593,6 +5599,24 @@ if has "docker"; then
docker=$($python $source_path/tests/docker/docker.py probe)
fi
##########################################
# check for libpmem
if test "$libpmem" != "no"; then
if $pkg_config --exists "libpmem"; then
libpmem="yes"
libpmem_libs=$($pkg_config --libs libpmem)
libpmem_cflags=$($pkg_config --cflags libpmem)
libs_softmmu="$libs_softmmu $libpmem_libs"
QEMU_CFLAGS="$QEMU_CFLAGS $libpmem_cflags"
else
if test "$libpmem" = "yes" ; then
feature_not_found "libpmem" "Install nvml or pmdk"
fi
libpmem="no"
fi
fi
##########################################
# End of CC checks
# After here, no more $cc or $ld runs
@ -6059,6 +6083,7 @@ echo "replication support $replication"
echo "VxHS block device $vxhs"
echo "capstone $capstone"
echo "docker $docker"
echo "libpmem support $libpmem"
if test "$sdl_too_old" = "yes"; then
echo "-> Your SDL version is too old - please upgrade to have SDL support"
@ -6816,6 +6841,10 @@ if test "$vxhs" = "yes" ; then
echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak
fi
if test "$libpmem" = "yes" ; then
echo "CONFIG_LIBPMEM=y" >> $config_host_mak
fi
if test "$tcg_interpreter" = "yes"; then
QEMU_INCLUDES="-iquote \$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
elif test "$ARCH" = "sparc64" ; then

View File

@ -173,3 +173,25 @@ There are currently two valid values for this option:
the NVDIMMs in the event of power loss. This implies that the
platform also supports flushing dirty data through the memory
controller on power loss.
If the vNVDIMM backend is in host persistent memory that can be accessed in
SNIA NVM Programming Model [1] (e.g., Intel NVDIMM), it's suggested to set
the 'pmem' option of memory-backend-file to 'on'. When 'pmem' is 'on' and QEMU
is built with libpmem [2] support (configured with --enable-libpmem), QEMU
will take necessary operations to guarantee the persistence of its own writes
to the vNVDIMM backend(e.g., in vNVDIMM label emulation and live migration).
If 'pmem' is 'on' while there is no libpmem support, qemu will exit and report
a "lack of libpmem support" message to ensure the persistence is available.
For example, if we want to ensure the persistence for some backend file,
use the QEMU command line:
-object memory-backend-file,id=nv_mem,mem-path=/XXX/yyy,size=4G,pmem=on
References
----------
[1] NVM Programming Model (NPM)
Version 1.2
https://www.snia.org/sites/default/files/technical_work/final/NVMProgrammingModel_v1.2.pdf
[2] Persistent Memory Development Kit (PMDK), formerly known as NVML project, home page:
http://pmem.io/pmdk/

38
exec.c
View File

@ -87,26 +87,6 @@ AddressSpace address_space_memory;
MemoryRegion io_mem_rom, io_mem_notdirty;
static MemoryRegion io_mem_unassigned;
/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
#define RAM_PREALLOC (1 << 0)
/* RAM is mmap-ed with MAP_SHARED */
#define RAM_SHARED (1 << 1)
/* Only a portion of RAM (used_length) is actually used, and migrated.
* This used_length size can change across reboots.
*/
#define RAM_RESIZEABLE (1 << 2)
/* UFFDIO_ZEROPAGE is available on this RAMBlock to atomically
* zero the page and wake waiting processes.
* (Set during postcopy)
*/
#define RAM_UF_ZEROPAGE (1 << 3)
/* RAM can be migrated */
#define RAM_MIGRATABLE (1 << 4)
#endif
#ifdef TARGET_PAGE_BITS_VARY
@ -2252,13 +2232,16 @@ static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared)
#ifdef __linux__
RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
bool share, int fd,
uint32_t ram_flags, int fd,
Error **errp)
{
RAMBlock *new_block;
Error *local_err = NULL;
int64_t file_size;
/* Just support these ram flags by now. */
assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0);
if (xen_enabled()) {
error_setg(errp, "-mem-path not supported with Xen");
return NULL;
@ -2294,14 +2277,14 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
new_block->mr = mr;
new_block->used_length = size;
new_block->max_length = size;
new_block->flags = share ? RAM_SHARED : 0;
new_block->flags = ram_flags;
new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp);
if (!new_block->host) {
g_free(new_block);
return NULL;
}
ram_block_add(new_block, &local_err, share);
ram_block_add(new_block, &local_err, ram_flags & RAM_SHARED);
if (local_err) {
g_free(new_block);
error_propagate(errp, local_err);
@ -2313,7 +2296,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
bool share, const char *mem_path,
uint32_t ram_flags, const char *mem_path,
Error **errp)
{
int fd;
@ -2325,7 +2308,7 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
return NULL;
}
block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp);
block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, errp);
if (!block) {
if (created) {
unlink(mem_path);
@ -4086,6 +4069,11 @@ err:
return ret;
}
bool ramblock_is_pmem(RAMBlock *rb)
{
return rb->flags & RAM_PMEM;
}
#endif
void page_size_init(void)

View File

@ -23,6 +23,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/pmem.h"
#include "qapi/error.h"
#include "qapi/visitor.h"
#include "hw/mem/nvdimm.h"
@ -164,11 +165,17 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf,
{
MemoryRegion *mr;
PCDIMMDevice *dimm = PC_DIMM(nvdimm);
bool is_pmem = object_property_get_bool(OBJECT(dimm->hostmem),
"pmem", NULL);
uint64_t backend_offset;
nvdimm_validate_rw_label_data(nvdimm, size, offset);
memcpy(nvdimm->label_data + offset, buf, size);
if (!is_pmem) {
memcpy(nvdimm->label_data + offset, buf, size);
} else {
pmem_memcpy_persist(nvdimm->label_data + offset, buf, size);
}
mr = host_memory_backend_get_memory(dimm->hostmem);
backend_offset = memory_region_size(mr) - nvdimm->label_size + offset;

View File

@ -103,6 +103,29 @@ struct IOMMUNotifier {
};
typedef struct IOMMUNotifier IOMMUNotifier;
/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
#define RAM_PREALLOC (1 << 0)
/* RAM is mmap-ed with MAP_SHARED */
#define RAM_SHARED (1 << 1)
/* Only a portion of RAM (used_length) is actually used, and migrated.
* This used_length size can change across reboots.
*/
#define RAM_RESIZEABLE (1 << 2)
/* UFFDIO_ZEROPAGE is available on this RAMBlock to atomically
* zero the page and wake waiting processes.
* (Set during postcopy)
*/
#define RAM_UF_ZEROPAGE (1 << 3)
/* RAM can be migrated */
#define RAM_MIGRATABLE (1 << 4)
/* RAM is a persistent kind memory */
#define RAM_PMEM (1 << 5)
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
IOMMUNotifierFlag flags,
hwaddr start, hwaddr end,
@ -611,6 +634,7 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
void *host),
Error **errp);
#ifdef __linux__
/**
* memory_region_init_ram_from_file: Initialize RAM memory region with a
* mmap-ed backend.
@ -622,7 +646,10 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
* @size: size of the region.
* @align: alignment of the region base address; if 0, the default alignment
* (getpagesize()) will be used.
* @share: %true if memory must be mmaped with the MAP_SHARED flag
* @ram_flags: Memory region features:
* - RAM_SHARED: memory must be mmaped with the MAP_SHARED flag
* - RAM_PMEM: the memory is persistent memory
* Other bits are ignored now.
* @path: the path in which to allocate the RAM.
* @errp: pointer to Error*, to store an error if it happens.
*
@ -634,7 +661,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
const char *name,
uint64_t size,
uint64_t align,
bool share,
uint32_t ram_flags,
const char *path,
Error **errp);

View File

@ -70,13 +70,37 @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
return host_addr_offset >> TARGET_PAGE_BITS;
}
bool ramblock_is_pmem(RAMBlock *rb);
long qemu_getrampagesize(void);
/**
* qemu_ram_alloc_from_file,
* qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing
* file or device
*
* Parameters:
* @size: the size in bytes of the ram block
* @mr: the memory region where the ram block is
* @ram_flags: specify the properties of the ram block, which can be one
* or bit-or of following values
* - RAM_SHARED: mmap the backing file or device with MAP_SHARED
* - RAM_PMEM: the backend @mem_path or @fd is persistent memory
* Other bits are ignored.
* @mem_path or @fd: specify the backing file or device
* @errp: pointer to Error*, to store an error if it happens
*
* Return:
* On success, return a pointer to the ram block.
* On failure, return NULL.
*/
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
bool share, const char *mem_path,
uint32_t ram_flags, const char *mem_path,
Error **errp);
RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
bool share, int fd,
uint32_t ram_flags, int fd,
Error **errp);
RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
MemoryRegion *mr, Error **errp);
RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr,

36
include/qemu/pmem.h Normal file
View File

@ -0,0 +1,36 @@
/*
* QEMU header file for libpmem.
*
* Copyright (c) 2018 Intel Corporation.
*
* Author: Haozhong Zhang <address@hidden>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef QEMU_PMEM_H
#define QEMU_PMEM_H
#ifdef CONFIG_LIBPMEM
#include <libpmem.h>
#else /* !CONFIG_LIBPMEM */
static inline void *
pmem_memcpy_persist(void *pmemdest, const void *src, size_t len)
{
/* If 'pmem' option is 'on', we should always have libpmem support,
or qemu will report a error and exit, never come here. */
g_assert_not_reached();
return NULL;
}
static inline void
pmem_persist(const void *addr, size_t len)
{
g_assert_not_reached();
}
#endif /* CONFIG_LIBPMEM */
#endif /* !QEMU_PMEM_H */

View File

@ -1551,7 +1551,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
const char *name,
uint64_t size,
uint64_t align,
bool share,
uint32_t ram_flags,
const char *path,
Error **errp)
{
@ -1560,7 +1560,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
mr->align = align;
mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp);
mr->ram_block = qemu_ram_alloc_from_file(size, mr, ram_flags, path, errp);
mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
}
@ -1576,7 +1576,9 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr,
mr->ram = true;
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
mr->ram_block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp);
mr->ram_block = qemu_ram_alloc_from_fd(size, mr,
share ? RAM_SHARED : 0,
fd, errp);
mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
}
#endif

View File

@ -33,6 +33,7 @@
#include "qemu/bitops.h"
#include "qemu/bitmap.h"
#include "qemu/main-loop.h"
#include "qemu/pmem.h"
#include "xbzrle.h"
#include "ram.h"
#include "migration.h"
@ -3547,6 +3548,13 @@ static int ram_load_setup(QEMUFile *f, void *opaque)
static int ram_load_cleanup(void *opaque)
{
RAMBlock *rb;
RAMBLOCK_FOREACH_MIGRATABLE(rb) {
if (ramblock_is_pmem(rb)) {
pmem_persist(rb->host, rb->used_length);
}
}
xbzrle_load_cleanup();
compress_threads_load_cleanup();
@ -3906,6 +3914,15 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
static bool ram_has_postcopy(void *opaque)
{
RAMBlock *rb;
RAMBLOCK_FOREACH_MIGRATABLE(rb) {
if (ramblock_is_pmem(rb)) {
info_report("Block: %s, host: %p is a nvdimm memory, postcopy"
"is not supported now!", rb->idstr, rb->host);
return false;
}
}
return migrate_postcopy_ram();
}

2
numa.c
View File

@ -479,7 +479,7 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
if (mem_path) {
#ifdef __linux__
Error *err = NULL;
memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, false,
memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, 0,
mem_path, &err);
if (err) {
error_report_err(err);

View File

@ -4048,6 +4048,13 @@ requires an alignment different than the default one used by QEMU, eg
the device DAX /dev/dax0.0 requires 2M alignment rather than 4K. In
such cases, users can specify the required alignment via this option.
The @option{pmem} option specifies whether the backing file specified
by @option{mem-path} is in host persistent memory that can be accessed
using the SNIA NVM programming model (e.g. Intel NVDIMM).
If @option{pmem} is set to 'on', QEMU will take necessary operations to
guarantee the persistence of its own writes to @option{mem-path}
(e.g. in vNVDIMM label emulation and live migration).
@item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},share=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave}
Creates a memory backend object, which can be used to back the guest RAM.