util: Introduce ThreadContext user-creatable object
Setting the CPU affinity of QEMU threads is a bit problematic, because
QEMU doesn't always have permissions to set the CPU affinity itself,
for example, with seccomp after initialized by QEMU:
-sandbox enable=on,resourcecontrol=deny
General information about CPU affinities can be found in the man page of
taskset:
CPU affinity is a scheduler property that "bonds" a process to a given
set of CPUs on the system. The Linux scheduler will honor the given CPU
affinity and the process will not run on any other CPUs.
While upper layers are already aware of how to handle CPU affinities for
long-lived threads like iothreads or vcpu threads, especially short-lived
threads, as used for memory-backend preallocation, are more involved to
handle. These threads are created on demand and upper layers are not even
able to identify and configure them.
Introduce the concept of a ThreadContext, that is essentially a thread
used for creating new threads. All threads created via that context
thread inherit the configured CPU affinity. Consequently, it's
sufficient to create a ThreadContext and configure it once, and have all
threads created via that ThreadContext inherit the same CPU affinity.
The CPU affinity of a ThreadContext can be configured two ways:
(1) Obtaining the thread id via the "thread-id" property and setting the
CPU affinity manually (e.g., via taskset).
(2) Setting the "cpu-affinity" property and letting QEMU try set the
CPU affinity itself. This will fail if QEMU doesn't have permissions
to do so anymore after seccomp was initialized.
A simple QEMU example to set the CPU affinity to host CPU 0,1,6,7 would be:
qemu-system-x86_64 -S \
-object thread-context,id=tc1,cpu-affinity=0-1,cpu-affinity=6-7
And we can query it via HMP/QMP:
(qemu) qom-get tc1 cpu-affinity
[
0,
1,
6,
7
]
But note that due to dynamic library loading this example will not work
before we actually make use of thread_context_create_thread() in QEMU
code, because the type will otherwise not get registered. We'll wire
this up next to make it work.
In general, the interface behaves like pthread_setaffinity_np(): host
CPU numbers that are currently not available are ignored; only host CPU
numbers that are impossible with the current kernel will fail. If the
list of host CPU numbers does not include a single CPU that is
available, setting the CPU affinity will fail.
A ThreadContext can be reused, simply by reconfiguring the CPU affinity.
Note that the CPU affinity of previously created threads will not get
adjusted.
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20221014134720.168738-4-david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 15:47:16 +02:00
|
|
|
/*
|
|
|
|
* QEMU Thread Context
|
|
|
|
*
|
|
|
|
* Copyright Red Hat Inc., 2022
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* David Hildenbrand <david@redhat.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
|
|
* See the COPYING file in the top-level directory.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "qemu/osdep.h"
|
|
|
|
#include "qemu/thread-context.h"
|
|
|
|
#include "qapi/error.h"
|
|
|
|
#include "qapi/qapi-builtin-visit.h"
|
|
|
|
#include "qapi/visitor.h"
|
|
|
|
#include "qemu/config-file.h"
|
|
|
|
#include "qapi/qapi-builtin-visit.h"
|
|
|
|
#include "qom/object_interfaces.h"
|
|
|
|
#include "qemu/module.h"
|
|
|
|
#include "qemu/bitmap.h"
|
|
|
|
|
util: Add write-only "node-affinity" property for ThreadContext
Let's make it easier to pin threads created via a ThreadContext to
all host CPUs currently belonging to a given set of host NUMA nodes --
which is the common case.
"node-affinity" is simply a shortcut for setting "cpu-affinity" manually
to the list of host CPUs belonging to the set of host nodes. This property
can only be written.
A simple QEMU example to set the CPU affinity to host node 1 on a system
with two nodes, 24 CPUs each, whereby odd-numbered host CPUs belong to
host node 1:
qemu-system-x86_64 -S \
-object thread-context,id=tc1,node-affinity=1
And we can query the cpu-affinity via HMP/QMP:
(qemu) qom-get tc1 cpu-affinity
[
1,
3,
5,
7,
9,
11,
13,
15,
17,
19,
21,
23,
25,
27,
29,
31,
33,
35,
37,
39,
41,
43,
45,
47
]
We cannot query the node-affinity:
(qemu) qom-get tc1 node-affinity
Error: Insufficient permission to perform this operation
But note that due to dynamic library loading this example will not work
before we actually make use of thread_context_create_thread() in QEMU
code, because the type will otherwise not get registered. We'll wire
this up next to make it work.
Note that if the host CPUs for a host node change due do CPU hot(un)plug
CPU onlining/offlining (i.e., lscpu output changes) after the ThreadContext
was started, the CPU affinity will not get updated.
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20221014134720.168738-5-david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 15:47:17 +02:00
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
#include <numa.h>
|
|
|
|
#endif
|
|
|
|
|
util: Introduce ThreadContext user-creatable object
Setting the CPU affinity of QEMU threads is a bit problematic, because
QEMU doesn't always have permissions to set the CPU affinity itself,
for example, with seccomp after initialized by QEMU:
-sandbox enable=on,resourcecontrol=deny
General information about CPU affinities can be found in the man page of
taskset:
CPU affinity is a scheduler property that "bonds" a process to a given
set of CPUs on the system. The Linux scheduler will honor the given CPU
affinity and the process will not run on any other CPUs.
While upper layers are already aware of how to handle CPU affinities for
long-lived threads like iothreads or vcpu threads, especially short-lived
threads, as used for memory-backend preallocation, are more involved to
handle. These threads are created on demand and upper layers are not even
able to identify and configure them.
Introduce the concept of a ThreadContext, that is essentially a thread
used for creating new threads. All threads created via that context
thread inherit the configured CPU affinity. Consequently, it's
sufficient to create a ThreadContext and configure it once, and have all
threads created via that ThreadContext inherit the same CPU affinity.
The CPU affinity of a ThreadContext can be configured two ways:
(1) Obtaining the thread id via the "thread-id" property and setting the
CPU affinity manually (e.g., via taskset).
(2) Setting the "cpu-affinity" property and letting QEMU try set the
CPU affinity itself. This will fail if QEMU doesn't have permissions
to do so anymore after seccomp was initialized.
A simple QEMU example to set the CPU affinity to host CPU 0,1,6,7 would be:
qemu-system-x86_64 -S \
-object thread-context,id=tc1,cpu-affinity=0-1,cpu-affinity=6-7
And we can query it via HMP/QMP:
(qemu) qom-get tc1 cpu-affinity
[
0,
1,
6,
7
]
But note that due to dynamic library loading this example will not work
before we actually make use of thread_context_create_thread() in QEMU
code, because the type will otherwise not get registered. We'll wire
this up next to make it work.
In general, the interface behaves like pthread_setaffinity_np(): host
CPU numbers that are currently not available are ignored; only host CPU
numbers that are impossible with the current kernel will fail. If the
list of host CPU numbers does not include a single CPU that is
available, setting the CPU affinity will fail.
A ThreadContext can be reused, simply by reconfiguring the CPU affinity.
Note that the CPU affinity of previously created threads will not get
adjusted.
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20221014134720.168738-4-david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 15:47:16 +02:00
|
|
|
enum {
|
|
|
|
TC_CMD_NONE = 0,
|
|
|
|
TC_CMD_STOP,
|
|
|
|
TC_CMD_NEW,
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct ThreadContextCmdNew {
|
|
|
|
QemuThread *thread;
|
|
|
|
const char *name;
|
|
|
|
void *(*start_routine)(void *);
|
|
|
|
void *arg;
|
|
|
|
int mode;
|
|
|
|
} ThreadContextCmdNew;
|
|
|
|
|
|
|
|
static void *thread_context_run(void *opaque)
|
|
|
|
{
|
|
|
|
ThreadContext *tc = opaque;
|
|
|
|
|
|
|
|
tc->thread_id = qemu_get_thread_id();
|
|
|
|
qemu_sem_post(&tc->sem);
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
/*
|
|
|
|
* Threads inherit the CPU affinity of the creating thread. For this
|
|
|
|
* reason, we create new (especially short-lived) threads from our
|
|
|
|
* persistent context thread.
|
|
|
|
*
|
|
|
|
* Especially when QEMU is not allowed to set the affinity itself,
|
|
|
|
* management tools can simply set the affinity of the context thread
|
|
|
|
* after creating the context, to have new threads created via
|
|
|
|
* the context inherit the CPU affinity automatically.
|
|
|
|
*/
|
|
|
|
switch (tc->thread_cmd) {
|
|
|
|
case TC_CMD_NONE:
|
|
|
|
break;
|
|
|
|
case TC_CMD_STOP:
|
|
|
|
tc->thread_cmd = TC_CMD_NONE;
|
|
|
|
qemu_sem_post(&tc->sem);
|
|
|
|
return NULL;
|
|
|
|
case TC_CMD_NEW: {
|
|
|
|
ThreadContextCmdNew *cmd_new = tc->thread_cmd_data;
|
|
|
|
|
|
|
|
qemu_thread_create(cmd_new->thread, cmd_new->name,
|
|
|
|
cmd_new->start_routine, cmd_new->arg,
|
|
|
|
cmd_new->mode);
|
|
|
|
tc->thread_cmd = TC_CMD_NONE;
|
|
|
|
tc->thread_cmd_data = NULL;
|
|
|
|
qemu_sem_post(&tc->sem);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
qemu_sem_wait(&tc->sem_thread);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void thread_context_set_cpu_affinity(Object *obj, Visitor *v,
|
|
|
|
const char *name, void *opaque,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
ThreadContext *tc = THREAD_CONTEXT(obj);
|
|
|
|
uint16List *l, *host_cpus = NULL;
|
|
|
|
unsigned long *bitmap = NULL;
|
|
|
|
int nbits = 0, ret;
|
|
|
|
|
util: Add write-only "node-affinity" property for ThreadContext
Let's make it easier to pin threads created via a ThreadContext to
all host CPUs currently belonging to a given set of host NUMA nodes --
which is the common case.
"node-affinity" is simply a shortcut for setting "cpu-affinity" manually
to the list of host CPUs belonging to the set of host nodes. This property
can only be written.
A simple QEMU example to set the CPU affinity to host node 1 on a system
with two nodes, 24 CPUs each, whereby odd-numbered host CPUs belong to
host node 1:
qemu-system-x86_64 -S \
-object thread-context,id=tc1,node-affinity=1
And we can query the cpu-affinity via HMP/QMP:
(qemu) qom-get tc1 cpu-affinity
[
1,
3,
5,
7,
9,
11,
13,
15,
17,
19,
21,
23,
25,
27,
29,
31,
33,
35,
37,
39,
41,
43,
45,
47
]
We cannot query the node-affinity:
(qemu) qom-get tc1 node-affinity
Error: Insufficient permission to perform this operation
But note that due to dynamic library loading this example will not work
before we actually make use of thread_context_create_thread() in QEMU
code, because the type will otherwise not get registered. We'll wire
this up next to make it work.
Note that if the host CPUs for a host node change due do CPU hot(un)plug
CPU onlining/offlining (i.e., lscpu output changes) after the ThreadContext
was started, the CPU affinity will not get updated.
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20221014134720.168738-5-david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 15:47:17 +02:00
|
|
|
if (tc->init_cpu_bitmap) {
|
|
|
|
error_setg(errp, "Mixing CPU and node affinity not supported");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2022-11-21 09:50:53 +01:00
|
|
|
if (!visit_type_uint16List(v, name, &host_cpus, errp)) {
|
util: Introduce ThreadContext user-creatable object
Setting the CPU affinity of QEMU threads is a bit problematic, because
QEMU doesn't always have permissions to set the CPU affinity itself,
for example, with seccomp after initialized by QEMU:
-sandbox enable=on,resourcecontrol=deny
General information about CPU affinities can be found in the man page of
taskset:
CPU affinity is a scheduler property that "bonds" a process to a given
set of CPUs on the system. The Linux scheduler will honor the given CPU
affinity and the process will not run on any other CPUs.
While upper layers are already aware of how to handle CPU affinities for
long-lived threads like iothreads or vcpu threads, especially short-lived
threads, as used for memory-backend preallocation, are more involved to
handle. These threads are created on demand and upper layers are not even
able to identify and configure them.
Introduce the concept of a ThreadContext, that is essentially a thread
used for creating new threads. All threads created via that context
thread inherit the configured CPU affinity. Consequently, it's
sufficient to create a ThreadContext and configure it once, and have all
threads created via that ThreadContext inherit the same CPU affinity.
The CPU affinity of a ThreadContext can be configured two ways:
(1) Obtaining the thread id via the "thread-id" property and setting the
CPU affinity manually (e.g., via taskset).
(2) Setting the "cpu-affinity" property and letting QEMU try set the
CPU affinity itself. This will fail if QEMU doesn't have permissions
to do so anymore after seccomp was initialized.
A simple QEMU example to set the CPU affinity to host CPU 0,1,6,7 would be:
qemu-system-x86_64 -S \
-object thread-context,id=tc1,cpu-affinity=0-1,cpu-affinity=6-7
And we can query it via HMP/QMP:
(qemu) qom-get tc1 cpu-affinity
[
0,
1,
6,
7
]
But note that due to dynamic library loading this example will not work
before we actually make use of thread_context_create_thread() in QEMU
code, because the type will otherwise not get registered. We'll wire
this up next to make it work.
In general, the interface behaves like pthread_setaffinity_np(): host
CPU numbers that are currently not available are ignored; only host CPU
numbers that are impossible with the current kernel will fail. If the
list of host CPU numbers does not include a single CPU that is
available, setting the CPU affinity will fail.
A ThreadContext can be reused, simply by reconfiguring the CPU affinity.
Note that the CPU affinity of previously created threads will not get
adjusted.
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20221014134720.168738-4-david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 15:47:16 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!host_cpus) {
|
|
|
|
error_setg(errp, "CPU list is empty");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (l = host_cpus; l; l = l->next) {
|
|
|
|
nbits = MAX(nbits, l->value + 1);
|
|
|
|
}
|
|
|
|
bitmap = bitmap_new(nbits);
|
|
|
|
for (l = host_cpus; l; l = l->next) {
|
|
|
|
set_bit(l->value, bitmap);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tc->thread_id != -1) {
|
|
|
|
/*
|
|
|
|
* Note: we won't be adjusting the affinity of any thread that is still
|
|
|
|
* around, but only the affinity of the context thread.
|
|
|
|
*/
|
|
|
|
ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits);
|
|
|
|
if (ret) {
|
|
|
|
error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
tc->init_cpu_bitmap = bitmap;
|
|
|
|
bitmap = NULL;
|
|
|
|
tc->init_cpu_nbits = nbits;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
g_free(bitmap);
|
|
|
|
qapi_free_uint16List(host_cpus);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void thread_context_get_cpu_affinity(Object *obj, Visitor *v,
|
|
|
|
const char *name, void *opaque,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
unsigned long *bitmap, nbits, value;
|
|
|
|
ThreadContext *tc = THREAD_CONTEXT(obj);
|
|
|
|
uint16List *host_cpus = NULL;
|
|
|
|
uint16List **tail = &host_cpus;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (tc->thread_id == -1) {
|
|
|
|
error_setg(errp, "Object not initialized yet");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = qemu_thread_get_affinity(&tc->thread, &bitmap, &nbits);
|
|
|
|
if (ret) {
|
|
|
|
error_setg(errp, "Getting CPU affinity failed: %s", strerror(ret));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
value = find_first_bit(bitmap, nbits);
|
|
|
|
while (value < nbits) {
|
|
|
|
QAPI_LIST_APPEND(tail, value);
|
|
|
|
|
|
|
|
value = find_next_bit(bitmap, nbits, value + 1);
|
|
|
|
}
|
|
|
|
g_free(bitmap);
|
|
|
|
|
|
|
|
visit_type_uint16List(v, name, &host_cpus, errp);
|
|
|
|
qapi_free_uint16List(host_cpus);
|
|
|
|
}
|
|
|
|
|
util: Add write-only "node-affinity" property for ThreadContext
Let's make it easier to pin threads created via a ThreadContext to
all host CPUs currently belonging to a given set of host NUMA nodes --
which is the common case.
"node-affinity" is simply a shortcut for setting "cpu-affinity" manually
to the list of host CPUs belonging to the set of host nodes. This property
can only be written.
A simple QEMU example to set the CPU affinity to host node 1 on a system
with two nodes, 24 CPUs each, whereby odd-numbered host CPUs belong to
host node 1:
qemu-system-x86_64 -S \
-object thread-context,id=tc1,node-affinity=1
And we can query the cpu-affinity via HMP/QMP:
(qemu) qom-get tc1 cpu-affinity
[
1,
3,
5,
7,
9,
11,
13,
15,
17,
19,
21,
23,
25,
27,
29,
31,
33,
35,
37,
39,
41,
43,
45,
47
]
We cannot query the node-affinity:
(qemu) qom-get tc1 node-affinity
Error: Insufficient permission to perform this operation
But note that due to dynamic library loading this example will not work
before we actually make use of thread_context_create_thread() in QEMU
code, because the type will otherwise not get registered. We'll wire
this up next to make it work.
Note that if the host CPUs for a host node change due do CPU hot(un)plug
CPU onlining/offlining (i.e., lscpu output changes) after the ThreadContext
was started, the CPU affinity will not get updated.
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20221014134720.168738-5-david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 15:47:17 +02:00
|
|
|
static void thread_context_set_node_affinity(Object *obj, Visitor *v,
|
|
|
|
const char *name, void *opaque,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
const int nbits = numa_num_possible_cpus();
|
|
|
|
ThreadContext *tc = THREAD_CONTEXT(obj);
|
|
|
|
uint16List *l, *host_nodes = NULL;
|
|
|
|
unsigned long *bitmap = NULL;
|
|
|
|
struct bitmask *tmp_cpus;
|
|
|
|
int ret, i;
|
|
|
|
|
|
|
|
if (tc->init_cpu_bitmap) {
|
|
|
|
error_setg(errp, "Mixing CPU and node affinity not supported");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2022-11-21 09:50:53 +01:00
|
|
|
if (!visit_type_uint16List(v, name, &host_nodes, errp)) {
|
util: Add write-only "node-affinity" property for ThreadContext
Let's make it easier to pin threads created via a ThreadContext to
all host CPUs currently belonging to a given set of host NUMA nodes --
which is the common case.
"node-affinity" is simply a shortcut for setting "cpu-affinity" manually
to the list of host CPUs belonging to the set of host nodes. This property
can only be written.
A simple QEMU example to set the CPU affinity to host node 1 on a system
with two nodes, 24 CPUs each, whereby odd-numbered host CPUs belong to
host node 1:
qemu-system-x86_64 -S \
-object thread-context,id=tc1,node-affinity=1
And we can query the cpu-affinity via HMP/QMP:
(qemu) qom-get tc1 cpu-affinity
[
1,
3,
5,
7,
9,
11,
13,
15,
17,
19,
21,
23,
25,
27,
29,
31,
33,
35,
37,
39,
41,
43,
45,
47
]
We cannot query the node-affinity:
(qemu) qom-get tc1 node-affinity
Error: Insufficient permission to perform this operation
But note that due to dynamic library loading this example will not work
before we actually make use of thread_context_create_thread() in QEMU
code, because the type will otherwise not get registered. We'll wire
this up next to make it work.
Note that if the host CPUs for a host node change due do CPU hot(un)plug
CPU onlining/offlining (i.e., lscpu output changes) after the ThreadContext
was started, the CPU affinity will not get updated.
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20221014134720.168738-5-david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 15:47:17 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!host_nodes) {
|
|
|
|
error_setg(errp, "Node list is empty");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
bitmap = bitmap_new(nbits);
|
|
|
|
tmp_cpus = numa_allocate_cpumask();
|
|
|
|
for (l = host_nodes; l; l = l->next) {
|
|
|
|
numa_bitmask_clearall(tmp_cpus);
|
|
|
|
ret = numa_node_to_cpus(l->value, tmp_cpus);
|
|
|
|
if (ret) {
|
|
|
|
/* We ignore any errors, such as impossible nodes. */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
for (i = 0; i < nbits; i++) {
|
|
|
|
if (numa_bitmask_isbitset(tmp_cpus, i)) {
|
|
|
|
set_bit(i, bitmap);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
numa_free_cpumask(tmp_cpus);
|
|
|
|
|
|
|
|
if (bitmap_empty(bitmap, nbits)) {
|
|
|
|
error_setg(errp, "The nodes select no CPUs");
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tc->thread_id != -1) {
|
|
|
|
/*
|
|
|
|
* Note: we won't be adjusting the affinity of any thread that is still
|
|
|
|
* around for now, but only the affinity of the context thread.
|
|
|
|
*/
|
|
|
|
ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits);
|
|
|
|
if (ret) {
|
|
|
|
error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
tc->init_cpu_bitmap = bitmap;
|
|
|
|
bitmap = NULL;
|
|
|
|
tc->init_cpu_nbits = nbits;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
g_free(bitmap);
|
|
|
|
qapi_free_uint16List(host_nodes);
|
|
|
|
#else
|
|
|
|
error_setg(errp, "NUMA node affinity is not supported by this QEMU");
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
util: Introduce ThreadContext user-creatable object
Setting the CPU affinity of QEMU threads is a bit problematic, because
QEMU doesn't always have permissions to set the CPU affinity itself,
for example, with seccomp after initialized by QEMU:
-sandbox enable=on,resourcecontrol=deny
General information about CPU affinities can be found in the man page of
taskset:
CPU affinity is a scheduler property that "bonds" a process to a given
set of CPUs on the system. The Linux scheduler will honor the given CPU
affinity and the process will not run on any other CPUs.
While upper layers are already aware of how to handle CPU affinities for
long-lived threads like iothreads or vcpu threads, especially short-lived
threads, as used for memory-backend preallocation, are more involved to
handle. These threads are created on demand and upper layers are not even
able to identify and configure them.
Introduce the concept of a ThreadContext, that is essentially a thread
used for creating new threads. All threads created via that context
thread inherit the configured CPU affinity. Consequently, it's
sufficient to create a ThreadContext and configure it once, and have all
threads created via that ThreadContext inherit the same CPU affinity.
The CPU affinity of a ThreadContext can be configured two ways:
(1) Obtaining the thread id via the "thread-id" property and setting the
CPU affinity manually (e.g., via taskset).
(2) Setting the "cpu-affinity" property and letting QEMU try set the
CPU affinity itself. This will fail if QEMU doesn't have permissions
to do so anymore after seccomp was initialized.
A simple QEMU example to set the CPU affinity to host CPU 0,1,6,7 would be:
qemu-system-x86_64 -S \
-object thread-context,id=tc1,cpu-affinity=0-1,cpu-affinity=6-7
And we can query it via HMP/QMP:
(qemu) qom-get tc1 cpu-affinity
[
0,
1,
6,
7
]
But note that due to dynamic library loading this example will not work
before we actually make use of thread_context_create_thread() in QEMU
code, because the type will otherwise not get registered. We'll wire
this up next to make it work.
In general, the interface behaves like pthread_setaffinity_np(): host
CPU numbers that are currently not available are ignored; only host CPU
numbers that are impossible with the current kernel will fail. If the
list of host CPU numbers does not include a single CPU that is
available, setting the CPU affinity will fail.
A ThreadContext can be reused, simply by reconfiguring the CPU affinity.
Note that the CPU affinity of previously created threads will not get
adjusted.
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20221014134720.168738-4-david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 15:47:16 +02:00
|
|
|
static void thread_context_get_thread_id(Object *obj, Visitor *v,
|
|
|
|
const char *name, void *opaque,
|
|
|
|
Error **errp)
|
|
|
|
{
|
|
|
|
ThreadContext *tc = THREAD_CONTEXT(obj);
|
|
|
|
uint64_t value = tc->thread_id;
|
|
|
|
|
|
|
|
visit_type_uint64(v, name, &value, errp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void thread_context_instance_complete(UserCreatable *uc, Error **errp)
|
|
|
|
{
|
|
|
|
ThreadContext *tc = THREAD_CONTEXT(uc);
|
|
|
|
char *thread_name;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
thread_name = g_strdup_printf("TC %s",
|
|
|
|
object_get_canonical_path_component(OBJECT(uc)));
|
|
|
|
qemu_thread_create(&tc->thread, thread_name, thread_context_run, tc,
|
|
|
|
QEMU_THREAD_JOINABLE);
|
|
|
|
g_free(thread_name);
|
|
|
|
|
|
|
|
/* Wait until initialization of the thread is done. */
|
|
|
|
while (tc->thread_id == -1) {
|
|
|
|
qemu_sem_wait(&tc->sem);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tc->init_cpu_bitmap) {
|
|
|
|
ret = qemu_thread_set_affinity(&tc->thread, tc->init_cpu_bitmap,
|
|
|
|
tc->init_cpu_nbits);
|
|
|
|
if (ret) {
|
|
|
|
error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
|
|
|
|
}
|
|
|
|
g_free(tc->init_cpu_bitmap);
|
|
|
|
tc->init_cpu_bitmap = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void thread_context_class_init(ObjectClass *oc, void *data)
|
|
|
|
{
|
|
|
|
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
|
|
|
|
|
|
|
|
ucc->complete = thread_context_instance_complete;
|
|
|
|
object_class_property_add(oc, "thread-id", "int",
|
|
|
|
thread_context_get_thread_id, NULL, NULL,
|
|
|
|
NULL);
|
|
|
|
object_class_property_add(oc, "cpu-affinity", "int",
|
|
|
|
thread_context_get_cpu_affinity,
|
|
|
|
thread_context_set_cpu_affinity, NULL, NULL);
|
util: Add write-only "node-affinity" property for ThreadContext
Let's make it easier to pin threads created via a ThreadContext to
all host CPUs currently belonging to a given set of host NUMA nodes --
which is the common case.
"node-affinity" is simply a shortcut for setting "cpu-affinity" manually
to the list of host CPUs belonging to the set of host nodes. This property
can only be written.
A simple QEMU example to set the CPU affinity to host node 1 on a system
with two nodes, 24 CPUs each, whereby odd-numbered host CPUs belong to
host node 1:
qemu-system-x86_64 -S \
-object thread-context,id=tc1,node-affinity=1
And we can query the cpu-affinity via HMP/QMP:
(qemu) qom-get tc1 cpu-affinity
[
1,
3,
5,
7,
9,
11,
13,
15,
17,
19,
21,
23,
25,
27,
29,
31,
33,
35,
37,
39,
41,
43,
45,
47
]
We cannot query the node-affinity:
(qemu) qom-get tc1 node-affinity
Error: Insufficient permission to perform this operation
But note that due to dynamic library loading this example will not work
before we actually make use of thread_context_create_thread() in QEMU
code, because the type will otherwise not get registered. We'll wire
this up next to make it work.
Note that if the host CPUs for a host node change due do CPU hot(un)plug
CPU onlining/offlining (i.e., lscpu output changes) after the ThreadContext
was started, the CPU affinity will not get updated.
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20221014134720.168738-5-david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 15:47:17 +02:00
|
|
|
object_class_property_add(oc, "node-affinity", "int", NULL,
|
|
|
|
thread_context_set_node_affinity, NULL, NULL);
|
util: Introduce ThreadContext user-creatable object
Setting the CPU affinity of QEMU threads is a bit problematic, because
QEMU doesn't always have permissions to set the CPU affinity itself,
for example, with seccomp after initialized by QEMU:
-sandbox enable=on,resourcecontrol=deny
General information about CPU affinities can be found in the man page of
taskset:
CPU affinity is a scheduler property that "bonds" a process to a given
set of CPUs on the system. The Linux scheduler will honor the given CPU
affinity and the process will not run on any other CPUs.
While upper layers are already aware of how to handle CPU affinities for
long-lived threads like iothreads or vcpu threads, especially short-lived
threads, as used for memory-backend preallocation, are more involved to
handle. These threads are created on demand and upper layers are not even
able to identify and configure them.
Introduce the concept of a ThreadContext, that is essentially a thread
used for creating new threads. All threads created via that context
thread inherit the configured CPU affinity. Consequently, it's
sufficient to create a ThreadContext and configure it once, and have all
threads created via that ThreadContext inherit the same CPU affinity.
The CPU affinity of a ThreadContext can be configured two ways:
(1) Obtaining the thread id via the "thread-id" property and setting the
CPU affinity manually (e.g., via taskset).
(2) Setting the "cpu-affinity" property and letting QEMU try set the
CPU affinity itself. This will fail if QEMU doesn't have permissions
to do so anymore after seccomp was initialized.
A simple QEMU example to set the CPU affinity to host CPU 0,1,6,7 would be:
qemu-system-x86_64 -S \
-object thread-context,id=tc1,cpu-affinity=0-1,cpu-affinity=6-7
And we can query it via HMP/QMP:
(qemu) qom-get tc1 cpu-affinity
[
0,
1,
6,
7
]
But note that due to dynamic library loading this example will not work
before we actually make use of thread_context_create_thread() in QEMU
code, because the type will otherwise not get registered. We'll wire
this up next to make it work.
In general, the interface behaves like pthread_setaffinity_np(): host
CPU numbers that are currently not available are ignored; only host CPU
numbers that are impossible with the current kernel will fail. If the
list of host CPU numbers does not include a single CPU that is
available, setting the CPU affinity will fail.
A ThreadContext can be reused, simply by reconfiguring the CPU affinity.
Note that the CPU affinity of previously created threads will not get
adjusted.
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20221014134720.168738-4-david@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2022-10-14 15:47:16 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void thread_context_instance_init(Object *obj)
|
|
|
|
{
|
|
|
|
ThreadContext *tc = THREAD_CONTEXT(obj);
|
|
|
|
|
|
|
|
tc->thread_id = -1;
|
|
|
|
qemu_sem_init(&tc->sem, 0);
|
|
|
|
qemu_sem_init(&tc->sem_thread, 0);
|
|
|
|
qemu_mutex_init(&tc->mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void thread_context_instance_finalize(Object *obj)
|
|
|
|
{
|
|
|
|
ThreadContext *tc = THREAD_CONTEXT(obj);
|
|
|
|
|
|
|
|
if (tc->thread_id != -1) {
|
|
|
|
tc->thread_cmd = TC_CMD_STOP;
|
|
|
|
qemu_sem_post(&tc->sem_thread);
|
|
|
|
qemu_thread_join(&tc->thread);
|
|
|
|
}
|
|
|
|
qemu_sem_destroy(&tc->sem);
|
|
|
|
qemu_sem_destroy(&tc->sem_thread);
|
|
|
|
qemu_mutex_destroy(&tc->mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const TypeInfo thread_context_info = {
|
|
|
|
.name = TYPE_THREAD_CONTEXT,
|
|
|
|
.parent = TYPE_OBJECT,
|
|
|
|
.class_init = thread_context_class_init,
|
|
|
|
.instance_size = sizeof(ThreadContext),
|
|
|
|
.instance_init = thread_context_instance_init,
|
|
|
|
.instance_finalize = thread_context_instance_finalize,
|
|
|
|
.interfaces = (InterfaceInfo[]) {
|
|
|
|
{ TYPE_USER_CREATABLE },
|
|
|
|
{ }
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
static void thread_context_register_types(void)
|
|
|
|
{
|
|
|
|
type_register_static(&thread_context_info);
|
|
|
|
}
|
|
|
|
type_init(thread_context_register_types)
|
|
|
|
|
|
|
|
void thread_context_create_thread(ThreadContext *tc, QemuThread *thread,
|
|
|
|
const char *name,
|
|
|
|
void *(*start_routine)(void *), void *arg,
|
|
|
|
int mode)
|
|
|
|
{
|
|
|
|
ThreadContextCmdNew data = {
|
|
|
|
.thread = thread,
|
|
|
|
.name = name,
|
|
|
|
.start_routine = start_routine,
|
|
|
|
.arg = arg,
|
|
|
|
.mode = mode,
|
|
|
|
};
|
|
|
|
|
|
|
|
qemu_mutex_lock(&tc->mutex);
|
|
|
|
tc->thread_cmd = TC_CMD_NEW;
|
|
|
|
tc->thread_cmd_data = &data;
|
|
|
|
qemu_sem_post(&tc->sem_thread);
|
|
|
|
|
|
|
|
while (tc->thread_cmd != TC_CMD_NONE) {
|
|
|
|
qemu_sem_wait(&tc->sem);
|
|
|
|
}
|
|
|
|
qemu_mutex_unlock(&tc->mutex);
|
|
|
|
}
|