numa: add -numa node,memdev= option

This option provides the infrastructure for binding guest NUMA nodes
to host NUMA nodes.  For example:

 -object memory-ram,size=1024M,policy=bind,host-nodes=0,id=ram-node0 \
 -numa node,nodeid=0,cpus=0,memdev=ram-node0 \
 -object memory-ram,size=1024M,policy=interleave,host-nodes=1-3,id=ram-node1 \
 -numa node,nodeid=1,cpus=1,memdev=ram-node1

The option replaces "-numa node,mem=".

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Hu Tao <hutao@cn.fujitsu.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>

MST: conflict resolution
This commit is contained in:
Paolo Bonzini 2014-05-14 17:43:17 +08:00 committed by Michael S. Tsirkin
parent 1f21772db0
commit 7febe36f9a
5 changed files with 84 additions and 11 deletions

View File

@ -149,6 +149,7 @@ extern int nb_numa_nodes;
typedef struct node_info { typedef struct node_info {
uint64_t node_mem; uint64_t node_mem;
DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS); DECLARE_BITMAP(node_cpu, MAX_CPUMASK_BITS);
struct HostMemoryBackend *node_memdev;
} NodeInfo; } NodeInfo;
extern NodeInfo numa_info[MAX_NODES]; extern NodeInfo numa_info[MAX_NODES];
void set_numa_nodes(void); void set_numa_nodes(void);

67
numa.c
View File

@ -33,6 +33,7 @@
#include "qapi/dealloc-visitor.h" #include "qapi/dealloc-visitor.h"
#include "qapi/qmp/qerror.h" #include "qapi/qmp/qerror.h"
#include "hw/boards.h" #include "hw/boards.h"
#include "sysemu/hostmem.h"
QemuOptsList qemu_numa_opts = { QemuOptsList qemu_numa_opts = {
.name = "numa", .name = "numa",
@ -41,6 +42,8 @@ QemuOptsList qemu_numa_opts = {
.desc = { { 0 } } /* validated with OptsVisitor */ .desc = { { 0 } } /* validated with OptsVisitor */
}; };
static int have_memdevs = -1;
static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp) static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
{ {
uint16_t nodenr; uint16_t nodenr;
@ -67,6 +70,20 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1); bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1);
} }
if (node->has_mem && node->has_memdev) {
error_setg(errp, "qemu: cannot specify both mem= and memdev=\n");
return;
}
if (have_memdevs == -1) {
have_memdevs = node->has_memdev;
}
if (node->has_memdev != have_memdevs) {
error_setg(errp, "qemu: memdev option must be specified for either "
"all or no nodes\n");
return;
}
if (node->has_mem) { if (node->has_mem) {
uint64_t mem_size = node->mem; uint64_t mem_size = node->mem;
const char *mem_str = qemu_opt_get(opts, "mem"); const char *mem_str = qemu_opt_get(opts, "mem");
@ -76,6 +93,18 @@ static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
} }
numa_info[nodenr].node_mem = mem_size; numa_info[nodenr].node_mem = mem_size;
} }
if (node->has_memdev) {
Object *o;
o = object_resolve_path_type(node->memdev, TYPE_MEMORY_BACKEND, NULL);
if (!o) {
error_setg(errp, "memdev=%s is ambiguous", node->memdev);
return;
}
object_ref(o);
numa_info[nodenr].node_mem = object_property_get_int(o, "size", NULL);
numa_info[nodenr].node_memdev = MEMORY_BACKEND(o);
}
} }
int numa_init_func(QemuOpts *opts, void *opaque) int numa_init_func(QemuOpts *opts, void *opaque)
@ -195,10 +224,42 @@ void set_numa_modes(void)
} }
} }
void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner, static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
const char *name, const char *name,
uint64_t ram_size) uint64_t ram_size)
{ {
memory_region_init_ram(mr, owner, name, ram_size); memory_region_init_ram(mr, owner, name, ram_size);
vmstate_register_ram_global(mr); vmstate_register_ram_global(mr);
} }
void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
const char *name,
uint64_t ram_size)
{
uint64_t addr = 0;
int i;
if (nb_numa_nodes == 0 || !have_memdevs) {
allocate_system_memory_nonnuma(mr, owner, name, ram_size);
return;
}
memory_region_init(mr, owner, name, ram_size);
for (i = 0; i < MAX_NODES; i++) {
Error *local_err = NULL;
uint64_t size = numa_info[i].node_mem;
HostMemoryBackend *backend = numa_info[i].node_memdev;
if (!backend) {
continue;
}
MemoryRegion *seg = host_memory_backend_get_memory(backend, &local_err);
if (local_err) {
qerror_report_err(local_err);
exit(1);
}
memory_region_add_subregion(mr, addr, seg);
vmstate_register_ram_global(seg);
addr += size;
}
}

View File

@ -3119,8 +3119,12 @@
# @cpus: #optional VCPUs belonging to this node (assign VCPUS round-robin # @cpus: #optional VCPUs belonging to this node (assign VCPUS round-robin
# if omitted) # if omitted)
# #
# @mem: #optional memory size of this node (equally divide total memory among # @mem: #optional memory size of this node; mutually exclusive with @memdev.
# nodes if omitted) # Equally divide total memory among nodes if both @mem and @memdev are
# omitted.
#
# @memdev: #optional memory backend object. If specified for one node,
# it must be specified for all nodes.
# #
# Since: 2.1 # Since: 2.1
## ##
@ -3128,4 +3132,5 @@
'data': { 'data': {
'*nodeid': 'uint16', '*nodeid': 'uint16',
'*cpus': ['uint16'], '*cpus': ['uint16'],
'*mem': 'size' }} '*mem': 'size',
'*memdev': 'str' }}

View File

@ -95,16 +95,22 @@ specifies the maximum number of hotpluggable CPUs.
ETEXI ETEXI
DEF("numa", HAS_ARG, QEMU_OPTION_numa, DEF("numa", HAS_ARG, QEMU_OPTION_numa,
"-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL) "-numa node[,mem=size][,cpus=cpu[-cpu]][,nodeid=node]\n"
"-numa node[,memdev=id][,cpus=cpu[-cpu]][,nodeid=node]\n", QEMU_ARCH_ALL)
STEXI STEXI
@item -numa node[,mem=@var{size}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}] @item -numa node[,mem=@var{size}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}]
@item -numa node[,memdev=@var{id}][,cpus=@var{cpu[-cpu]}][,nodeid=@var{node}]
@findex -numa @findex -numa
Simulate a multi node NUMA system. If @samp{mem} Simulate a multi node NUMA system. If @samp{mem}, @samp{memdev}
and @samp{cpus} are omitted, resources are split equally. Also, note and @samp{cpus} are omitted, resources are split equally. Also, note
that the -@option{numa} option doesn't allocate any of the specified that the -@option{numa} option doesn't allocate any of the specified
resources. That is, it just assigns existing resources to NUMA nodes. This resources. That is, it just assigns existing resources to NUMA nodes. This
means that one still has to use the @option{-m}, @option{-smp} options means that one still has to use the @option{-m}, @option{-smp} options
to allocate RAM and VCPUs respectively. to allocate RAM and VCPUs respectively, and possibly @option{-object}
to specify the memory backend for the @samp{memdev} suboption.
@samp{mem} and @samp{memdev} are mutually exclusive. Furthermore, if one
node uses @samp{memdev}, all of them have to use it.
ETEXI ETEXI
DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd, DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd,

4
vl.c
View File

@ -3952,6 +3952,8 @@ int main(int argc, char **argv, char **envp)
exit(1); exit(1);
} }
cpu_exec_init_all();
current_machine = MACHINE(object_new(object_class_get_name( current_machine = MACHINE(object_new(object_class_get_name(
OBJECT_CLASS(machine_class)))); OBJECT_CLASS(machine_class))));
object_property_add_child(object_get_root(), "machine", object_property_add_child(object_get_root(), "machine",
@ -4289,8 +4291,6 @@ int main(int argc, char **argv, char **envp)
} }
} }
cpu_exec_init_all();
blk_mig_init(); blk_mig_init();
ram_mig_init(); ram_mig_init();