machine queue, 2018-01-19

-----BEGIN PGP SIGNATURE-----
 
 iQIcBAABCAAGBQJaYh0bAAoJECgHk2+YTcWmnvIP/1N5GwnZxP94YQOZu3SCVtk7
 J3ShPAfC0DuR+3rw1Lq80k53AxJJJrxXDHIs/4PNA0K1qE5dKytDmICkc8Cfup28
 DctYwnwVEiWvjYzTRDiDskYftW+eelwGI83kjDYA4m4R257Wpw0ko2a+Sxwc0Py3
 hvFvcatMUDhzQQS/gOWJs8kUayvyA7ZXBiGqrk+xRYMIRRQEaxQeq5h/UyB1NOz/
 2tEF0PD5CU/DdQVmbE+qb0vEbdE18n+phbYwZQP8yDcApB1SnQy8GXqqL6r6q/m3
 dErgP1AJx/Fg+yUOpNu4gVgLcVvCek2o+NB5yy1Sj2xdU/oMzu4HNiwChb8a3Guo
 MbY2tGadsl4LU4UwLxGHCUwf6igCVgrpUh3k8THc8WolpHZTgcLxbTnR2WlK4xDq
 bPu/7tnVcwK2R2NT5ApykkiO3OVbl99qK1WA9xoOL+KvOawOIT8F0rQSFgayy6RW
 6YH+gL5UDjcxYjnz+Ux+Ci+HzxMH92ffYU3bP5jNvHFS2eavLI7kSkx8kEUaDq2K
 q4USIhD9XgX3wSdrmDaKlNkBrUIDOtO7RtcsQjH625eZcc3SaiHTN/yXjMFqEWX+
 uwf1QnQRKoOLVyXLsgMYeM0BLnWet1z/LqnM2/6zyJ/RFAynDGn/592edqd21Bhz
 aZOF0mPRAkmre1kAIOhS
 =r5q8
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/ehabkost/tags/machine-next-pull-request' into staging

machine queue, 2018-01-19

# gpg: Signature made Fri 19 Jan 2018 16:30:19 GMT
# gpg:                using RSA key 0x2807936F984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF  D1AA 2807 936F 984D C5A6

* remotes/ehabkost/tags/machine-next-pull-request:
  fw_cfg: fix memory corruption when all fw_cfg slots are used
  possible_cpus: add CPUArchId::type field
  nvdimm: add 'unarmed' option
  nvdimm: add a macro for property "label-size"
  hostmem-file: add "align" option
  scripts: Remove fixed entries from the device-crash-test
  qdev: Check for the availability of a hotplug controller before adding a device
  qdev_monitor: Simplify error handling in qdev_device_add()
  q35: Allow only supported dynamic sysbus devices
  xen: Add only xen-sysdev to dynamic sysbus device list
  spapr: Allow only supported dynamic sysbus devices
  ppc: e500: Allow only supported dynamic sysbus devices
  hw/arm/virt: Allow only supported dynamic sysbus devices
  machine: Replace has_dynamic_sysbus with list of allowed devices
  numa: fix missing '-numa cpu' in '-help' output
  qemu-options: document memory-backend-ram
  qemu-options: document missing memory-backend-file options
  memfd: remove needless include
  memfd: split qemu_memfd_alloc()

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2018-01-19 16:35:25 +00:00
commit b384cd95eb
27 changed files with 333 additions and 101 deletions

View File

@ -34,6 +34,7 @@ struct HostMemoryBackendFile {
bool share;
bool discard_data;
char *mem_path;
uint64_t align;
};
static void
@ -58,7 +59,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
path = object_get_canonical_path(OBJECT(backend));
memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
path,
backend->size, fb->share,
backend->size, fb->align, fb->share,
fb->mem_path, errp);
g_free(path);
}
@ -115,6 +116,40 @@ static void file_memory_backend_set_discard_data(Object *o, bool value,
MEMORY_BACKEND_FILE(o)->discard_data = value;
}
static void file_memory_backend_get_align(Object *o, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
uint64_t val = fb->align;
visit_type_size(v, name, &val, errp);
}
static void file_memory_backend_set_align(Object *o, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(o);
HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
Error *local_err = NULL;
uint64_t val;
if (host_memory_backend_mr_inited(backend)) {
error_setg(&local_err, "cannot change property value");
goto out;
}
visit_type_size(v, name, &val, &local_err);
if (local_err) {
goto out;
}
fb->align = val;
out:
error_propagate(errp, local_err);
}
static void file_backend_unparent(Object *obj)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
@ -145,6 +180,10 @@ file_backend_class_init(ObjectClass *oc, void *data)
object_class_property_add_str(oc, "mem-path",
get_mem_path, set_mem_path,
&error_abort);
object_class_property_add(oc, "align", "int",
file_memory_backend_get_align,
file_memory_backend_set_align,
NULL, NULL, &error_abort);
}
static void file_backend_instance_finalize(Object *o)

View File

@ -122,3 +122,34 @@ Note:
M >= size of RAM devices +
size of statically plugged vNVDIMM devices +
size of hotplugged vNVDIMM devices
Alignment
---------
QEMU uses mmap(2) to maps vNVDIMM backends and aligns the mapping
address to the page size (getpagesize(2)) by default. However, some
types of backends may require an alignment different than the page
size. In that case, QEMU v2.12.0 and later provide 'align' option to
memory-backend-file to allow users to specify the proper alignment.
For example, device dax require the 2 MB alignment, so we can use
following QEMU command line options to use it (/dev/dax0.0) as the
backend of vNVDIMM:
-object memory-backend-file,id=mem1,share=on,mem-path=/dev/dax0.0,size=4G,align=2M
-device nvdimm,id=nvdimm1,memdev=mem1
Guest Data Persistence
----------------------
Though QEMU supports multiple types of vNVDIMM backends on Linux,
currently the only one that can guarantee the guest write persistence
is the device DAX on the real NVDIMM device (e.g., /dev/dax0.0), to
which all guest access do not involve any host-side kernel cache.
When using other types of backends, it's suggested to set 'unarmed'
option of '-device nvdimm' to 'on', which sets the unarmed flag of the
guest NVDIMM region mapping structure. This unarmed flag indicates
guest software that this vNVDIMM device contains a region that cannot
accept persistent writes. In result, for example, the guest Linux
NVDIMM driver, marks such vNVDIMM device as read-only.

8
exec.c
View File

@ -1612,7 +1612,13 @@ static void *file_ram_alloc(RAMBlock *block,
void *area;
block->page_size = qemu_fd_getpagesize(fd);
block->mr->align = block->page_size;
if (block->mr->align % block->page_size) {
error_setg(errp, "alignment 0x%" PRIx64
" must be multiples of page size 0x%zx",
block->mr->align, block->page_size);
return NULL;
}
block->mr->align = MAX(block->page_size, block->mr->align);
#if defined(__s390x__)
if (kvm_enabled()) {
block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);

View File

@ -138,6 +138,8 @@ struct NvdimmNfitMemDev {
} QEMU_PACKED;
typedef struct NvdimmNfitMemDev NvdimmNfitMemDev;
#define ACPI_NFIT_MEM_NOT_ARMED (1 << 3)
/*
* NVDIMM Control Region Structure
*
@ -284,6 +286,7 @@ static void
nvdimm_build_structure_memdev(GArray *structures, DeviceState *dev)
{
NvdimmNfitMemDev *nfit_memdev;
NVDIMMDevice *nvdimm = NVDIMM(OBJECT(dev));
uint64_t size = object_property_get_uint(OBJECT(dev), PC_DIMM_SIZE_PROP,
NULL);
int slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP,
@ -312,6 +315,10 @@ nvdimm_build_structure_memdev(GArray *structures, DeviceState *dev)
/* Only one interleave for PMEM. */
nfit_memdev->interleave_ways = cpu_to_le16(1);
if (nvdimm->unarmed) {
nfit_memdev->flags |= cpu_to_le16(ACPI_NFIT_MEM_NOT_ARMED);
}
}
/*

View File

@ -34,6 +34,8 @@
#include "hw/arm/arm.h"
#include "hw/arm/primecell.h"
#include "hw/arm/virt.h"
#include "hw/vfio/vfio-calxeda-xgmac.h"
#include "hw/vfio/vfio-amd-xgbe.h"
#include "hw/devices.h"
#include "net/net.h"
#include "sysemu/block-backend.h"
@ -1357,7 +1359,7 @@ static void machvirt_init(MachineState *machine)
break;
}
cpuobj = object_new(machine->cpu_type);
cpuobj = object_new(possible_cpus->cpus[n].type);
object_property_set_int(cpuobj, possible_cpus->cpus[n].arch_id,
"mp-affinity", NULL);
@ -1573,6 +1575,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
sizeof(CPUArchId) * max_cpus);
ms->possible_cpus->len = max_cpus;
for (n = 0; n < ms->possible_cpus->len; n++) {
ms->possible_cpus->cpus[n].type = ms->cpu_type;
ms->possible_cpus->cpus[n].arch_id =
virt_cpu_mp_affinity(vms, n);
ms->possible_cpus->cpus[n].props.has_thread_id = true;
@ -1591,7 +1594,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
* configuration of the particular instance.
*/
mc->max_cpus = 255;
mc->has_dynamic_sysbus = true;
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_CALXEDA_XGMAC);
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
mc->block_default_type = IF_VIRTIO;
mc->no_cdrom = 1;
mc->pci_allow_0_address = true;

View File

@ -334,46 +334,61 @@ static bool machine_get_enforce_config_section(Object *obj, Error **errp)
return ms->enforce_config_section;
}
static void error_on_sysbus_device(SysBusDevice *sbdev, void *opaque)
void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type)
{
error_report("Option '-device %s' cannot be handled by this machine",
object_class_get_name(object_get_class(OBJECT(sbdev))));
exit(1);
strList *item = g_new0(strList, 1);
item->value = g_strdup(type);
item->next = mc->allowed_dynamic_sysbus_devices;
mc->allowed_dynamic_sysbus_devices = item;
}
static void validate_sysbus_device(SysBusDevice *sbdev, void *opaque)
{
MachineState *machine = opaque;
MachineClass *mc = MACHINE_GET_CLASS(machine);
bool allowed = false;
strList *wl;
for (wl = mc->allowed_dynamic_sysbus_devices;
!allowed && wl;
wl = wl->next) {
allowed |= !!object_dynamic_cast(OBJECT(sbdev), wl->value);
}
if (!allowed) {
error_report("Option '-device %s' cannot be handled by this machine",
object_class_get_name(object_get_class(OBJECT(sbdev))));
exit(1);
}
}
static void machine_init_notify(Notifier *notifier, void *data)
{
Object *machine = qdev_get_machine();
ObjectClass *oc = object_get_class(machine);
MachineClass *mc = MACHINE_CLASS(oc);
if (mc->has_dynamic_sysbus) {
/* Our machine can handle dynamic sysbus devices, we're all good */
return;
}
MachineState *machine = MACHINE(qdev_get_machine());
/*
* Loop through all dynamically created devices and check whether there
* are sysbus devices among them. If there are, error out.
* Loop through all dynamically created sysbus devices and check if they are
* all allowed. If a device is not allowed, error out.
*/
foreach_dynamic_sysbus_device(error_on_sysbus_device, NULL);
foreach_dynamic_sysbus_device(validate_sysbus_device, machine);
}
HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine)
{
int i;
Object *cpu;
HotpluggableCPUList *head = NULL;
const char *cpu_type;
MachineClass *mc = MACHINE_GET_CLASS(machine);
/* force board to initialize possible_cpus if it hasn't been done yet */
mc->possible_cpu_arch_ids(machine);
cpu = machine->possible_cpus->cpus[0].cpu;
assert(cpu); /* Boot cpu is always present */
cpu_type = object_get_typename(cpu);
for (i = 0; i < machine->possible_cpus->len; i++) {
Object *cpu;
HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1);
HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1);
cpu_item->type = g_strdup(cpu_type);
cpu_item->type = g_strdup(machine->possible_cpus->cpus[i].type);
cpu_item->vcpus_count = machine->possible_cpus->cpus[i].vcpus_count;
cpu_item->props = g_memdup(&machine->possible_cpus->cpus[i].props,
sizeof(*cpu_item->props));

View File

@ -253,19 +253,31 @@ void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id,
dev->alias_required_for_version = required_for_version;
}
HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev)
{
MachineState *machine;
MachineClass *mc;
Object *m_obj = qdev_get_machine();
if (object_dynamic_cast(m_obj, TYPE_MACHINE)) {
machine = MACHINE(m_obj);
mc = MACHINE_GET_CLASS(machine);
if (mc->get_hotplug_handler) {
return mc->get_hotplug_handler(machine, dev);
}
}
return NULL;
}
HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev)
{
HotplugHandler *hotplug_ctrl = NULL;
HotplugHandler *hotplug_ctrl;
if (dev->parent_bus && dev->parent_bus->hotplug_handler) {
hotplug_ctrl = dev->parent_bus->hotplug_handler;
} else if (object_dynamic_cast(qdev_get_machine(), TYPE_MACHINE)) {
MachineState *machine = MACHINE(qdev_get_machine());
MachineClass *mc = MACHINE_GET_CLASS(machine);
if (mc->get_hotplug_handler) {
hotplug_ctrl = mc->get_hotplug_handler(machine, dev);
}
} else {
hotplug_ctrl = qdev_get_machine_hotplug_handler(dev);
}
return hotplug_ctrl;
}

View File

@ -1148,7 +1148,8 @@ void pc_cpus_init(PCMachineState *pcms)
pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
possible_cpus = mc->possible_cpu_arch_ids(ms);
for (i = 0; i < smp_cpus; i++) {
pc_new_cpu(ms->cpu_type, possible_cpus->cpus[i].arch_id, &error_fatal);
pc_new_cpu(possible_cpus->cpus[i].type, possible_cpus->cpus[i].arch_id,
&error_fatal);
}
}
@ -2307,6 +2308,7 @@ static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms)
for (i = 0; i < ms->possible_cpus->len; i++) {
X86CPUTopoInfo topo;
ms->possible_cpus->cpus[i].type = ms->cpu_type;
ms->possible_cpus->cpus[i].vcpus_count = 1;
ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(i);
x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id,

View File

@ -42,6 +42,8 @@
#include "exec/address-spaces.h"
#include "hw/i386/pc.h"
#include "hw/i386/ich9.h"
#include "hw/i386/amd_iommu.h"
#include "hw/i386/intel_iommu.h"
#include "hw/smbios/smbios.h"
#include "hw/ide/pci.h"
#include "hw/ide/ahci.h"
@ -299,7 +301,8 @@ static void pc_q35_machine_options(MachineClass *m)
m->default_machine_opts = "firmware=bios-256k.bin";
m->default_display = "std";
m->no_floppy = 1;
m->has_dynamic_sysbus = true;
machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE);
machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE);
m->max_cpus = 288;
}

View File

@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qapi/visitor.h"
#include "qapi-visit.h"
#include "hw/mem/nvdimm.h"
static void nvdimm_get_label_size(Object *obj, Visitor *v, const char *name,
@ -64,11 +65,36 @@ out:
error_propagate(errp, local_err);
}
static bool nvdimm_get_unarmed(Object *obj, Error **errp)
{
NVDIMMDevice *nvdimm = NVDIMM(obj);
return nvdimm->unarmed;
}
static void nvdimm_set_unarmed(Object *obj, bool value, Error **errp)
{
NVDIMMDevice *nvdimm = NVDIMM(obj);
Error *local_err = NULL;
if (memory_region_size(&nvdimm->nvdimm_mr)) {
error_setg(&local_err, "cannot change property value");
goto out;
}
nvdimm->unarmed = value;
out:
error_propagate(errp, local_err);
}
static void nvdimm_init(Object *obj)
{
object_property_add(obj, "label-size", "int",
object_property_add(obj, NVDIMM_LABLE_SIZE_PROP, "int",
nvdimm_get_label_size, nvdimm_set_label_size, NULL,
NULL, NULL);
object_property_add_bool(obj, NVDIMM_UNARMED_PROP,
nvdimm_get_unarmed, nvdimm_set_unarmed, NULL);
}
static MemoryRegion *nvdimm_get_memory_region(PCDIMMDevice *dimm, Error **errp)

View File

@ -784,7 +784,7 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename,
* index and "i - 1" is the one being copied from, thus the
* unusual start and end in the for statement.
*/
for (i = count + 1; i > index; i--) {
for (i = count; i > index; i--) {
s->files->f[i] = s->files->f[i - 1];
s->files->f[i].select = cpu_to_be16(FW_CFG_FILE_FIRST + i);
s->entries[0][FW_CFG_FILE_FIRST + i] =
@ -833,7 +833,6 @@ void *fw_cfg_modify_file(FWCfgState *s, const char *filename,
assert(s->files);
index = be32_to_cpu(s->files->count);
assert(index < fw_cfg_file_slots(s));
for (i = 0; i < index; i++) {
if (strcmp(filename, s->files->f[i].name) == 0) {
@ -843,6 +842,9 @@ void *fw_cfg_modify_file(FWCfgState *s, const char *filename,
return ptr;
}
}
assert(index < fw_cfg_file_slots(s));
/* add new one */
fw_cfg_add_file_callback(s, filename, NULL, NULL, NULL, data, len, true);
return NULL;

View File

@ -12,9 +12,11 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "e500.h"
#include "hw/net/fsl_etsec/etsec.h"
#include "hw/boards.h"
#include "sysemu/device_tree.h"
#include "sysemu/kvm.h"
#include "hw/sysbus.h"
#include "hw/pci/pci.h"
#include "hw/ppc/openpic.h"
#include "kvm_ppc.h"
@ -63,7 +65,7 @@ static void e500plat_machine_init(MachineClass *mc)
mc->desc = "generic paravirt e500 platform";
mc->init = e500plat_init;
mc->max_cpus = 32;
mc->has_dynamic_sysbus = true;
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ETSEC_COMMON);
mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("e500v2_v30");
}

View File

@ -2226,11 +2226,6 @@ static void spapr_init_cpus(sPAPRMachineState *spapr)
int boot_cores_nr = smp_cpus / smp_threads;
int i;
if (!type) {
error_report("Unable to find sPAPR CPU Core definition");
exit(1);
}
possible_cpus = mc->possible_cpu_arch_ids(machine);
if (mc->has_hotpluggable_cpus) {
if (smp_cpus % smp_threads) {
@ -3545,6 +3540,7 @@ static int64_t spapr_get_default_cpu_node_id(const MachineState *ms, int idx)
static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
{
int i;
const char *core_type;
int spapr_max_cores = max_cpus / smp_threads;
MachineClass *mc = MACHINE_GET_CLASS(machine);
@ -3556,12 +3552,19 @@ static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
return machine->possible_cpus;
}
core_type = spapr_get_cpu_core_type(machine->cpu_type);
if (!core_type) {
error_report("Unable to find sPAPR CPU Core definition");
exit(1);
}
machine->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
sizeof(CPUArchId) * spapr_max_cores);
machine->possible_cpus->len = spapr_max_cores;
for (i = 0; i < machine->possible_cpus->len; i++) {
int core_id = i * smp_threads;
machine->possible_cpus->cpus[i].type = core_type;
machine->possible_cpus->cpus[i].vcpus_count = smp_threads;
machine->possible_cpus->cpus[i].arch_id = core_id;
machine->possible_cpus->cpus[i].props.has_core_id = true;
@ -3843,7 +3846,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
mc->default_boot_order = "";
mc->default_ram_size = 512 * M_BYTE;
mc->kvm_type = spapr_kvm_type;
mc->has_dynamic_sysbus = true;
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_SPAPR_PCI_HOST_BRIDGE);
mc->pci_allow_0_address = true;
mc->get_hotplug_handler = spapr_get_hotplug_handler;
hc->pre_plug = spapr_machine_device_pre_plug;

View File

@ -414,6 +414,7 @@ static const CPUArchIdList *s390_possible_cpu_arch_ids(MachineState *ms)
sizeof(CPUArchId) * max_cpus);
ms->possible_cpus->len = max_cpus;
for (i = 0; i < ms->possible_cpus->len; i++) {
ms->possible_cpus->cpus[i].type = ms->cpu_type;
ms->possible_cpus->cpus[i].vcpus_count = 1;
ms->possible_cpus->cpus[i].arch_id = i;
ms->possible_cpus->cpus[i].props.has_core_id = true;

View File

@ -564,7 +564,7 @@ static void xen_set_dynamic_sysbus(void)
ObjectClass *oc = object_get_class(machine);
MachineClass *mc = MACHINE_CLASS(oc);
mc->has_dynamic_sysbus = true;
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_XENSYSDEV);
}
int xen_be_register(const char *type, struct XenDevOps *ops)

View File

@ -465,6 +465,8 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
* @name: Region name, becomes part of RAMBlock name used in migration stream
* must be unique within any device
* @size: size of the region.
* @align: alignment of the region base address; if 0, the default alignment
* (getpagesize()) will be used.
* @share: %true if memory must be mmaped with the MAP_SHARED flag
* @path: the path in which to allocate the RAM.
* @errp: pointer to Error*, to store an error if it happens.
@ -476,6 +478,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
struct Object *owner,
const char *name,
uint64_t size,
uint64_t align,
bool share,
const char *path,
Error **errp);

View File

@ -76,10 +76,14 @@ void machine_set_cpu_numa_node(MachineState *machine,
const CpuInstanceProperties *props,
Error **errp);
void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type);
/**
* CPUArchId:
* @arch_id - architecture-dependent CPU ID of present or possible CPU
* @cpu - pointer to corresponding CPU object if it's present on NULL otherwise
* @type - QOM class name of possible @cpu object
* @props - CPU object properties, initialized by board
* #vcpus_count - number of threads provided by @cpu object
*/
@ -88,6 +92,7 @@ typedef struct {
int64_t vcpus_count;
CpuInstanceProperties props;
Object *cpu;
const char *type;
} CPUArchId;
/**
@ -179,7 +184,6 @@ struct MachineClass {
no_floppy:1,
no_cdrom:1,
no_sdcard:1,
has_dynamic_sysbus:1,
pci_allow_0_address:1,
legacy_fw_cfg_order:1;
int is_default;
@ -197,6 +201,7 @@ struct MachineClass {
bool ignore_memory_transaction_failures;
int numa_mem_align_shift;
const char **valid_cpu_types;
strList *allowed_dynamic_sysbus_devices;
bool auto_enable_numa_with_memhp;
void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes,
int nb_nodes, ram_addr_t size);

View File

@ -47,6 +47,10 @@
#define NVDIMM_CLASS(oc) OBJECT_CLASS_CHECK(NVDIMMClass, (oc), TYPE_NVDIMM)
#define NVDIMM_GET_CLASS(obj) OBJECT_GET_CLASS(NVDIMMClass, (obj), \
TYPE_NVDIMM)
#define NVDIMM_LABLE_SIZE_PROP "label-size"
#define NVDIMM_UNARMED_PROP "unarmed"
struct NVDIMMDevice {
/* private */
PCDIMMDevice parent_obj;
@ -71,6 +75,14 @@ struct NVDIMMDevice {
* guest via ACPI NFIT and _FIT method if NVDIMM hotplug is supported.
*/
MemoryRegion nvdimm_mr;
/*
* The 'on' value results in the unarmed flag set in ACPI NFIT,
* which can be used to notify guest implicitly that the host
* backend (e.g., files on HDD, /dev/pmemX, etc.) cannot guarantee
* the guest write persistence.
*/
bool unarmed;
};
typedef struct NVDIMMDevice NVDIMMDevice;

View File

@ -286,6 +286,7 @@ DeviceState *qdev_try_create(BusState *bus, const char *name);
void qdev_init_nofail(DeviceState *dev);
void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id,
int required_for_version);
HotplugHandler *qdev_get_machine_hotplug_handler(DeviceState *dev);
HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev);
void qdev_unplug(DeviceState *dev, Error **errp);
void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev,

View File

@ -16,6 +16,7 @@
#define F_SEAL_WRITE 0x0008 /* prevent writes */
#endif
int qemu_memfd_create(const char *name, size_t size, unsigned int seals);
void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
int *fd);
void qemu_memfd_free(void *ptr, size_t size, int fd);

View File

@ -1570,6 +1570,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
struct Object *owner,
const char *name,
uint64_t size,
uint64_t align,
bool share,
const char *path,
Error **errp)
@ -1578,6 +1579,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
mr->ram = true;
mr->terminates = true;
mr->destructor = memory_region_destructor_ram;
mr->align = align;
mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp);
mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
}

2
numa.c
View File

@ -456,7 +456,7 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
if (mem_path) {
#ifdef __linux__
Error *err = NULL;
memory_region_init_ram_from_file(mr, owner, name, ram_size, false,
memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, false,
mem_path, &err);
if (err) {
error_report_err(err);

View File

@ -613,28 +613,33 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
if (bus) {
qdev_set_parent_bus(dev, bus);
} else if (qdev_hotplug && !qdev_get_machine_hotplug_handler(dev)) {
/* No bus, no machine hotplug handler --> device is not hotpluggable */
error_setg(&err, "Device '%s' can not be hotplugged on this machine",
driver);
goto err_del_dev;
}
qdev_set_id(dev, qemu_opts_id(opts));
/* set properties */
if (qemu_opt_foreach(opts, set_property, dev, &err)) {
error_propagate(errp, err);
object_unparent(OBJECT(dev));
object_unref(OBJECT(dev));
return NULL;
goto err_del_dev;
}
dev->opts = opts;
object_property_set_bool(OBJECT(dev), true, "realized", &err);
if (err != NULL) {
error_propagate(errp, err);
dev->opts = NULL;
object_unparent(OBJECT(dev));
object_unref(OBJECT(dev));
return NULL;
goto err_del_dev;
}
return dev;
err_del_dev:
error_propagate(errp, err);
object_unparent(OBJECT(dev));
object_unref(OBJECT(dev));
return NULL;
}

View File

@ -169,7 +169,9 @@ ETEXI
DEF("numa", HAS_ARG, QEMU_OPTION_numa,
"-numa node[,mem=size][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
"-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node]\n"
"-numa dist,src=source,dst=destination,val=distance\n", QEMU_ARCH_ALL)
"-numa dist,src=source,dst=destination,val=distance\n"
"-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n",
QEMU_ARCH_ALL)
STEXI
@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
@itemx -numa node[,memdev=@var{id}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}]
@ -3972,18 +3974,24 @@ property must be set. These objects are placed in the
@table @option
@item -object memory-backend-file,id=@var{id},size=@var{size},mem-path=@var{dir},share=@var{on|off},discard-data=@var{on|off}
@item -object memory-backend-file,id=@var{id},size=@var{size},mem-path=@var{dir},share=@var{on|off},discard-data=@var{on|off},merge=@var{on|off},dump=@var{on|off},prealloc=@var{on|off},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave},align=@var{align}
Creates a memory file backend object, which can be used to back
the guest RAM with huge pages. The @option{id} parameter is a
unique ID that will be used to reference this memory region
when configuring the @option{-numa} argument. The @option{size}
option provides the size of the memory region, and accepts
common suffixes, eg @option{500M}. The @option{mem-path} provides
the path to either a shared memory or huge page filesystem mount.
the guest RAM with huge pages.
The @option{id} parameter is a unique ID that will be used to reference this
memory region when configuring the @option{-numa} argument.
The @option{size} option provides the size of the memory region, and accepts
common suffixes, eg @option{500M}.
The @option{mem-path} provides the path to either a shared memory or huge page
filesystem mount.
The @option{share} boolean option determines whether the memory
region is marked as private to QEMU, or shared. The latter allows
a co-operating external process to access the QEMU memory region.
Setting the @option{discard-data} boolean option to @var{on}
indicates that file contents can be destroyed when QEMU exits,
to avoid unnecessarily flushing data to the backing file. Note
@ -3991,6 +3999,48 @@ that @option{discard-data} is only an optimization, and QEMU
might not discard file contents if it aborts unexpectedly or is
terminated using SIGKILL.
The @option{merge} boolean option enables memory merge, also known as
MADV_MERGEABLE, so that Kernel Samepage Merging will consider the pages for
memory deduplication.
Setting the @option{dump} boolean option to @var{off} excludes the memory from
core dumps. This feature is also known as MADV_DONTDUMP.
The @option{prealloc} boolean option enables memory preallocation.
The @option{host-nodes} option binds the memory range to a list of NUMA host
nodes.
The @option{policy} option sets the NUMA policy to one of the following values:
@table @option
@item @var{default}
default host policy
@item @var{preferred}
prefer the given host node list for allocation
@item @var{bind}
restrict memory allocation to the given host node list
@item @var{interleave}
interleave memory allocations across the given host node list
@end table
The @option{align} option specifies the base address alignment when
QEMU mmap(2) @option{mem-path}, and accepts common suffixes, eg
@option{2M}. Some backend store specified by @option{mem-path}
requires an alignment different than the default one used by QEMU, eg
the device DAX /dev/dax0.0 requires 2M alignment rather than 4K. In
such cases, users can specify the required alignment via this option.
@item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave}
Creates a memory backend object, which can be used to back the guest RAM.
Memory backend objects offer more control than the @option{-m} option that is
traditionally used to define guest RAM. Please refer to
@option{memory-backend-file} for a description of the options.
@item -object rng-random,id=@var{id},filename=@var{/dev/random}
Creates a random number generator backend which obtains entropy from

View File

@ -207,11 +207,9 @@ ERROR_WHITELIST = [
# Known crashes will generate error messages, but won't be fatal.
# Those entries must be removed once we fix the crashes.
{'exitcode':-6, 'log':r"Device 'serial0' is in use", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"spapr_rtas_register: Assertion .*rtas_table\[token\]\.name.* failed", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"qemu_net_client_setup: Assertion `!peer->peer' failed", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r'RAMBlock "[\w.-]+" already registered', 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"find_ram_offset: Assertion `size != 0' failed.", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"puv3_load_kernel: Assertion `kernel_filename != NULL' failed", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"add_cpreg_to_hashtable: code should not be reached", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"qemu_alloc_display: Assertion `surface->image != NULL' failed", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"Unexpected error in error_set_from_qdev_prop_error", 'loglevel':logging.ERROR},
@ -219,16 +217,10 @@ ERROR_WHITELIST = [
{'exitcode':-6, 'log':r"Object .* is not an instance of type generic-pc-machine", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"Object .* is not an instance of type e500-ccsr", 'loglevel':logging.ERROR},
{'exitcode':-6, 'log':r"vmstate_register_with_alias_id: Assertion `!se->compat \|\| se->instance_id == 0' failed", 'loglevel':logging.ERROR},
{'exitcode':-11, 'device':'stm32f205-soc', 'loglevel':logging.ERROR, 'expected':True},
{'exitcode':-11, 'device':'xlnx,zynqmp', 'loglevel':logging.ERROR, 'expected':True},
{'exitcode':-11, 'device':'mips-cps', 'loglevel':logging.ERROR, 'expected':True},
{'exitcode':-11, 'device':'gus', 'loglevel':logging.ERROR, 'expected':True},
{'exitcode':-11, 'device':'a9mpcore_priv', 'loglevel':logging.ERROR, 'expected':True},
{'exitcode':-11, 'device':'a15mpcore_priv', 'loglevel':logging.ERROR, 'expected':True},
{'exitcode':-11, 'device':'isa-serial', 'loglevel':logging.ERROR, 'expected':True},
{'exitcode':-11, 'device':'sb16', 'loglevel':logging.ERROR, 'expected':True},
{'exitcode':-11, 'device':'cs4231a', 'loglevel':logging.ERROR, 'expected':True},
{'exitcode':-11, 'device':'arm-gicv3', 'loglevel':logging.ERROR, 'expected':True},
{'exitcode':-11, 'machine':'isapc', 'device':'.*-iommu', 'loglevel':logging.ERROR, 'expected':True},
# everything else (including SIGABRT and SIGSEGV) will be a fatal error:

View File

@ -27,8 +27,6 @@
#include "qemu/osdep.h"
#include <glib/gprintf.h>
#include "qemu/memfd.h"
#if defined CONFIG_LINUX && !defined CONFIG_MEMFD
@ -53,6 +51,38 @@ static int memfd_create(const char *name, unsigned int flags)
#define MFD_ALLOW_SEALING 0x0002U
#endif
int qemu_memfd_create(const char *name, size_t size, unsigned int seals)
{
int mfd = -1;
#ifdef CONFIG_LINUX
unsigned int flags = MFD_CLOEXEC;
if (seals) {
flags |= MFD_ALLOW_SEALING;
}
mfd = memfd_create(name, flags);
if (mfd < 0) {
return -1;
}
if (ftruncate(mfd, size) == -1) {
perror("ftruncate");
close(mfd);
return -1;
}
if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) {
perror("fcntl");
close(mfd);
return -1;
}
#endif
return mfd;
}
/*
* This is a best-effort helper for shared memory allocation, with
* optional sealing. The helper will do his best to allocate using
@ -63,35 +93,14 @@ void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
int *fd)
{
void *ptr;
int mfd = -1;
int mfd = qemu_memfd_create(name, size, seals);
*fd = -1;
#ifdef CONFIG_LINUX
if (seals) {
mfd = memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC);
/* some systems have memfd without sealing */
if (mfd == -1) {
mfd = qemu_memfd_create(name, size, 0);
}
if (mfd == -1) {
/* some systems have memfd without sealing */
mfd = memfd_create(name, MFD_CLOEXEC);
seals = 0;
}
#endif
if (mfd != -1) {
if (ftruncate(mfd, size) == -1) {
perror("ftruncate");
close(mfd);
return NULL;
}
if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) {
perror("fcntl");
close(mfd);
return NULL;
}
} else {
const char *tmpdir = g_get_tmp_dir();
gchar *fname;

3
vl.c
View File

@ -4611,8 +4611,6 @@ int main(int argc, char **argv, char **envp)
current_machine->boot_order = boot_order;
current_machine->cpu_model = cpu_model;
parse_numa_opts(current_machine);
/* parse features once if machine provides default cpu_type */
if (machine_class->default_cpu_type) {
current_machine->cpu_type = machine_class->default_cpu_type;
@ -4621,6 +4619,7 @@ int main(int argc, char **argv, char **envp)
cpu_parse_cpu_model(machine_class->default_cpu_type, cpu_model);
}
}
parse_numa_opts(current_machine);
machine_run_board_init(current_machine);