numa: add -numa node,memdev= option
This option provides the infrastructure for binding guest NUMA nodes to host NUMA nodes. For example: -object memory-ram,size=1024M,policy=bind,host-nodes=0,id=ram-node0 \ -numa node,nodeid=0,cpus=0,memdev=ram-node0 \ -object memory-ram,size=1024M,policy=interleave,host-nodes=1-3,id=ram-node1 \ -numa node,nodeid=1,cpus=1,memdev=ram-node1 The option replaces "-numa node,mem=". Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Hu Tao <hutao@cn.fujitsu.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> MST: conflict resolution
This commit is contained in:
parent
1f21772db0
commit
7febe36f9a
|
@ -149,6 +149,7 @@ extern int nb_numa_nodes;
|
||||||
typedef struct node_info {
|
typedef struct node_info {
|
||||||
uint64_t node_mem;
|
uint64_t node_mem;
|
||||||
DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
|
DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
|
||||||
|
struct HostMemoryBackend *node_memdev;
|
||||||
} NodeInfo;
|
} NodeInfo;
|
||||||
extern NodeInfo numa_info[MAX_NODES];
|
extern NodeInfo numa_info[MAX_NODES];
|
||||||
void set_numa_nodes(void);
|
void set_numa_nodes(void);
|
||||||
|
|
63
numa.c
63
numa.c
|
@ -33,6 +33,7 @@
|
||||||
#include "qapi/dealloc-visitor.h"
|
#include "qapi/dealloc-visitor.h"
|
||||||
#include "qapi/qmp/qerror.h"
|
#include "qapi/qmp/qerror.h"
|
||||||
#include "hw/boards.h"
|
#include "hw/boards.h"
|
||||||
|
#include "sysemu/hostmem.h"
|
||||||
|
|
||||||
QemuOptsList qemu_numa_opts = {
|
QemuOptsList qemu_numa_opts = {
|
||||||
.name = "numa",
|
.name = "numa",
|
||||||
|
@ -41,6 +42,8 @@ QemuOptsList qemu_numa_opts = {
|
||||||
.desc = { { 0 } } /* validated with OptsVisitor */
|
.desc = { { 0 } } /* validated with OptsVisitor */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int have_memdevs = -1;
|
||||||
|
|
||||||
static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
|
static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
|
||||||
{
|
{
|
||||||
uint16_t nodenr;
|
uint16_t nodenr;
|
||||||
|
@ -67,6 +70,20 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
|
||||||
bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1);
|
bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (node->has_mem && node->has_memdev) {
|
||||||
|
error_setg(errp, "qemu: cannot specify both mem= and memdev=\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (have_memdevs == -1) {
|
||||||
|
have_memdevs = node->has_memdev;
|
||||||
|
}
|
||||||
|
if (node->has_memdev != have_memdevs) {
|
||||||
|
error_setg(errp, "qemu: memdev option must be specified for either "
|
||||||
|
"all or no nodes\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (node->has_mem) {
|
if (node->has_mem) {
|
||||||
uint64_t mem_size = node->mem;
|
uint64_t mem_size = node->mem;
|
||||||
const char *mem_str = qemu_opt_get(opts, "mem");
|
const char *mem_str = qemu_opt_get(opts, "mem");
|
||||||
|
@ -76,6 +93,18 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
|
||||||
}
|
}
|
||||||
numa_info[nodenr].node_mem = mem_size;
|
numa_info[nodenr].node_mem = mem_size;
|
||||||
}
|
}
|
||||||
|
if (node->has_memdev) {
|
||||||
|
Object *o;
|
||||||
|
o = object_resolve_path_type(node->memdev, TYPE_MEMORY_BACKEND, NULL);
|
||||||
|
if (!o) {
|
||||||
|
error_setg(errp, "memdev=%s is ambiguous", node->memdev);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
object_ref(o);
|
||||||
|
numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL);
|
||||||
|
numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int numa_init_func(QemuOpts *opts, void *opaque)
|
int numa_init_func(QemuOpts *opts, void *opaque)
|
||||||
|
@ -195,10 +224,42 @@ void set_numa_modes(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
|
static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
|
||||||
const char *name,
|
const char *name,
|
||||||
uint64_t ram_size)
|
uint64_t ram_size)
|
||||||
{
|
{
|
||||||
memory_region_init_ram(mr, owner, name, ram_size);
|
memory_region_init_ram(mr, owner, name, ram_size);
|
||||||
vmstate_register_ram_global(mr);
|
vmstate_register_ram_global(mr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
|
||||||
|
const char *name,
|
||||||
|
uint64_t ram_size)
|
||||||
|
{
|
||||||
|
uint64_t addr = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (nb_numa_nodes == 0 || !have_memdevs) {
|
||||||
|
allocate_system_memory_nonnuma(mr, owner, name, ram_size);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
memory_region_init(mr, owner, name, ram_size);
|
||||||
|
for (i = 0; i < MAX_NODES; i++) {
|
||||||
|
Error *local_err = NULL;
|
||||||
|
uint64_t size = numa_info[i].node_mem;
|
||||||
|
HostMemoryBackend *backend = numa_info[i].node_memdev;
|
||||||
|
if (!backend) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
MemoryRegion *seg = host_memory_backend_get_memory(backend, &local_err);
|
||||||
|
if (local_err) {
|
||||||
|
qerror_report_err(local_err);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
memory_region_add_subregion(mr, addr, seg);
|
||||||
|
vmstate_register_ram_global(seg);
|
||||||
|
addr += size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -3119,8 +3119,12 @@
|
||||||
# @cpus: #optional VCPUs belonging to this node (assign VCPUS round-robin
|
# @cpus: #optional VCPUs belonging to this node (assign VCPUS round-robin
|
||||||
# if omitted)
|
# if omitted)
|
||||||
#
|
#
|
||||||
# @mem: #optional memory size of this node (equally divide total memory among
|
# @mem: #optional memory size of this node; mutually exclusive with @memdev.
|
||||||
# nodes if omitted)
|
# Equally divide total memory among nodes if both @mem and @memdev are
|
||||||
|
# omitted.
|
||||||
|
#
|
||||||
|
# @memdev: #optional memory backend object. If specified for one node,
|
||||||
|
# it must be specified for all nodes.
|
||||||
#
|
#
|
||||||
# Since: 2.1
|
# Since: 2.1
|
||||||
##
|
##
|
||||||
|
@ -3128,4 +3132,5 @@
|
||||||
'data': {
|
'data': {
|
||||||
'*nodeid': 'uint16',
|
'*nodeid': 'uint16',
|
||||||
'*cpus': ['uint16'],
|
'*cpus': ['uint16'],
|
||||||
'*mem': 'size' }}
|
'*mem': 'size',
|
||||||
|
'*memdev': 'str' }}
|
||||||
|
|
|
@ -95,16 +95,22 @@ specifies the maximum number of hotpluggable CPUs.
|
||||||
ETEXI
|
ETEXI
|
||||||
|
|
||||||
DEF("numa", HAS_ARG, QEMU_OPTION_numa,
|
DEF("numa", HAS_ARG, QEMU_OPTION_numa,
|
||||||
"-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
|
"-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n"
|
||||||
|
"-numa node[,memdev=id][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
|
||||||
STEXI
|
STEXI
|
||||||
@item -numa node[,mem=@var{size}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}]
|
@item -numa node[,mem=@var{size}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}]
|
||||||
|
@item -numa node[,memdev=@var{id}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}]
|
||||||
@findex -numa
|
@findex -numa
|
||||||
Simulate a multi node NUMA system. If @samp{mem}
|
Simulate a multi node NUMA system. If @samp{mem}, @samp{memdev}
|
||||||
and @samp{cpus} are omitted, resources are split equally. Also, note
|
and @samp{cpus} are omitted, resources are split equally. Also, note
|
||||||
that the -@option{numa} option doesn't allocate any of the specified
|
that the -@option{numa} option doesn't allocate any of the specified
|
||||||
resources. That is, it just assigns existing resources to NUMA nodes. This
|
resources. That is, it just assigns existing resources to NUMA nodes. This
|
||||||
means that one still has to use the @option{-m}, @option{-smp} options
|
means that one still has to use the @option{-m}, @option{-smp} options
|
||||||
to allocate RAM and VCPUs respectively.
|
to allocate RAM and VCPUs respectively, and possibly @option{-object}
|
||||||
|
to specify the memory backend for the @samp{memdev} suboption.
|
||||||
|
|
||||||
|
@samp{mem} and @samp{memdev} are mutually exclusive. Furthermore, if one
|
||||||
|
node uses @samp{memdev}, all of them have to use it.
|
||||||
ETEXI
|
ETEXI
|
||||||
|
|
||||||
DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd,
|
DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd,
|
||||||
|
|
4
vl.c
4
vl.c
|
@ -3952,6 +3952,8 @@ int main(int argc, char **argv, char **envp)
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cpu_exec_init_all();
|
||||||
|
|
||||||
current_machine = MACHINE(object_new(object_class_get_name(
|
current_machine = MACHINE(object_new(object_class_get_name(
|
||||||
OBJECT_CLASS(machine_class))));
|
OBJECT_CLASS(machine_class))));
|
||||||
object_property_add_child(object_get_root(), "machine",
|
object_property_add_child(object_get_root(), "machine",
|
||||||
|
@ -4289,8 +4291,6 @@ int main(int argc, char **argv, char **envp)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cpu_exec_init_all();
|
|
||||||
|
|
||||||
blk_mig_init();
|
blk_mig_init();
|
||||||
ram_mig_init();
|
ram_mig_init();
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue