10218ae6d0
Let's make it easier to pin threads created via a ThreadContext to all host CPUs currently belonging to a given set of host NUMA nodes -- which is the common case. "node-affinity" is simply a shortcut for setting "cpu-affinity" manually to the list of host CPUs belonging to the set of host nodes. This property can only be written. A simple QEMU example to set the CPU affinity to host node 1 on a system with two nodes, 24 CPUs each, whereby odd-numbered host CPUs belong to host node 1: qemu-system-x86_64 -S \ -object thread-context,id=tc1,node-affinity=1 And we can query the cpu-affinity via HMP/QMP: (qemu) qom-get tc1 cpu-affinity [ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47 ] We cannot query the node-affinity: (qemu) qom-get tc1 node-affinity Error: Insufficient permission to perform this operation But note that due to dynamic library loading this example will not work before we actually make use of thread_context_create_thread() in QEMU code, because the type will otherwise not get registered. We'll wire this up next to make it work. Note that if the host CPUs for a host node change due do CPU hot(un)plug CPU onlining/offlining (i.e., lscpu output changes) after the ThreadContext was started, the CPU affinity will not get updated. Reviewed-by: Michal Privoznik <mprivozn@redhat.com> Acked-by: Markus Armbruster <armbru@redhat.com> Message-Id: <20221014134720.168738-5-david@redhat.com> Signed-off-by: David Hildenbrand <david@redhat.com>
363 lines
10 KiB
C
363 lines
10 KiB
C
/*
|
|
* QEMU Thread Context
|
|
*
|
|
* Copyright Red Hat Inc., 2022
|
|
*
|
|
* Authors:
|
|
* David Hildenbrand <david@redhat.com>
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
* See the COPYING file in the top-level directory.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "qemu/thread-context.h"
|
|
#include "qapi/error.h"
|
|
#include "qapi/qapi-builtin-visit.h"
|
|
#include "qapi/visitor.h"
|
|
#include "qemu/config-file.h"
|
|
#include "qapi/qapi-builtin-visit.h"
|
|
#include "qom/object_interfaces.h"
|
|
#include "qemu/module.h"
|
|
#include "qemu/bitmap.h"
|
|
|
|
#ifdef CONFIG_NUMA
|
|
#include <numa.h>
|
|
#endif
|
|
|
|
enum {
|
|
TC_CMD_NONE = 0,
|
|
TC_CMD_STOP,
|
|
TC_CMD_NEW,
|
|
};
|
|
|
|
typedef struct ThreadContextCmdNew {
|
|
QemuThread *thread;
|
|
const char *name;
|
|
void *(*start_routine)(void *);
|
|
void *arg;
|
|
int mode;
|
|
} ThreadContextCmdNew;
|
|
|
|
static void *thread_context_run(void *opaque)
|
|
{
|
|
ThreadContext *tc = opaque;
|
|
|
|
tc->thread_id = qemu_get_thread_id();
|
|
qemu_sem_post(&tc->sem);
|
|
|
|
while (true) {
|
|
/*
|
|
* Threads inherit the CPU affinity of the creating thread. For this
|
|
* reason, we create new (especially short-lived) threads from our
|
|
* persistent context thread.
|
|
*
|
|
* Especially when QEMU is not allowed to set the affinity itself,
|
|
* management tools can simply set the affinity of the context thread
|
|
* after creating the context, to have new threads created via
|
|
* the context inherit the CPU affinity automatically.
|
|
*/
|
|
switch (tc->thread_cmd) {
|
|
case TC_CMD_NONE:
|
|
break;
|
|
case TC_CMD_STOP:
|
|
tc->thread_cmd = TC_CMD_NONE;
|
|
qemu_sem_post(&tc->sem);
|
|
return NULL;
|
|
case TC_CMD_NEW: {
|
|
ThreadContextCmdNew *cmd_new = tc->thread_cmd_data;
|
|
|
|
qemu_thread_create(cmd_new->thread, cmd_new->name,
|
|
cmd_new->start_routine, cmd_new->arg,
|
|
cmd_new->mode);
|
|
tc->thread_cmd = TC_CMD_NONE;
|
|
tc->thread_cmd_data = NULL;
|
|
qemu_sem_post(&tc->sem);
|
|
break;
|
|
}
|
|
default:
|
|
g_assert_not_reached();
|
|
}
|
|
qemu_sem_wait(&tc->sem_thread);
|
|
}
|
|
}
|
|
|
|
static void thread_context_set_cpu_affinity(Object *obj, Visitor *v,
|
|
const char *name, void *opaque,
|
|
Error **errp)
|
|
{
|
|
ThreadContext *tc = THREAD_CONTEXT(obj);
|
|
uint16List *l, *host_cpus = NULL;
|
|
unsigned long *bitmap = NULL;
|
|
int nbits = 0, ret;
|
|
Error *err = NULL;
|
|
|
|
if (tc->init_cpu_bitmap) {
|
|
error_setg(errp, "Mixing CPU and node affinity not supported");
|
|
return;
|
|
}
|
|
|
|
visit_type_uint16List(v, name, &host_cpus, &err);
|
|
if (err) {
|
|
error_propagate(errp, err);
|
|
return;
|
|
}
|
|
|
|
if (!host_cpus) {
|
|
error_setg(errp, "CPU list is empty");
|
|
goto out;
|
|
}
|
|
|
|
for (l = host_cpus; l; l = l->next) {
|
|
nbits = MAX(nbits, l->value + 1);
|
|
}
|
|
bitmap = bitmap_new(nbits);
|
|
for (l = host_cpus; l; l = l->next) {
|
|
set_bit(l->value, bitmap);
|
|
}
|
|
|
|
if (tc->thread_id != -1) {
|
|
/*
|
|
* Note: we won't be adjusting the affinity of any thread that is still
|
|
* around, but only the affinity of the context thread.
|
|
*/
|
|
ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits);
|
|
if (ret) {
|
|
error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
|
|
}
|
|
} else {
|
|
tc->init_cpu_bitmap = bitmap;
|
|
bitmap = NULL;
|
|
tc->init_cpu_nbits = nbits;
|
|
}
|
|
out:
|
|
g_free(bitmap);
|
|
qapi_free_uint16List(host_cpus);
|
|
}
|
|
|
|
static void thread_context_get_cpu_affinity(Object *obj, Visitor *v,
|
|
const char *name, void *opaque,
|
|
Error **errp)
|
|
{
|
|
unsigned long *bitmap, nbits, value;
|
|
ThreadContext *tc = THREAD_CONTEXT(obj);
|
|
uint16List *host_cpus = NULL;
|
|
uint16List **tail = &host_cpus;
|
|
int ret;
|
|
|
|
if (tc->thread_id == -1) {
|
|
error_setg(errp, "Object not initialized yet");
|
|
return;
|
|
}
|
|
|
|
ret = qemu_thread_get_affinity(&tc->thread, &bitmap, &nbits);
|
|
if (ret) {
|
|
error_setg(errp, "Getting CPU affinity failed: %s", strerror(ret));
|
|
return;
|
|
}
|
|
|
|
value = find_first_bit(bitmap, nbits);
|
|
while (value < nbits) {
|
|
QAPI_LIST_APPEND(tail, value);
|
|
|
|
value = find_next_bit(bitmap, nbits, value + 1);
|
|
}
|
|
g_free(bitmap);
|
|
|
|
visit_type_uint16List(v, name, &host_cpus, errp);
|
|
qapi_free_uint16List(host_cpus);
|
|
}
|
|
|
|
static void thread_context_set_node_affinity(Object *obj, Visitor *v,
|
|
const char *name, void *opaque,
|
|
Error **errp)
|
|
{
|
|
#ifdef CONFIG_NUMA
|
|
const int nbits = numa_num_possible_cpus();
|
|
ThreadContext *tc = THREAD_CONTEXT(obj);
|
|
uint16List *l, *host_nodes = NULL;
|
|
unsigned long *bitmap = NULL;
|
|
struct bitmask *tmp_cpus;
|
|
Error *err = NULL;
|
|
int ret, i;
|
|
|
|
if (tc->init_cpu_bitmap) {
|
|
error_setg(errp, "Mixing CPU and node affinity not supported");
|
|
return;
|
|
}
|
|
|
|
visit_type_uint16List(v, name, &host_nodes, &err);
|
|
if (err) {
|
|
error_propagate(errp, err);
|
|
return;
|
|
}
|
|
|
|
if (!host_nodes) {
|
|
error_setg(errp, "Node list is empty");
|
|
goto out;
|
|
}
|
|
|
|
bitmap = bitmap_new(nbits);
|
|
tmp_cpus = numa_allocate_cpumask();
|
|
for (l = host_nodes; l; l = l->next) {
|
|
numa_bitmask_clearall(tmp_cpus);
|
|
ret = numa_node_to_cpus(l->value, tmp_cpus);
|
|
if (ret) {
|
|
/* We ignore any errors, such as impossible nodes. */
|
|
continue;
|
|
}
|
|
for (i = 0; i < nbits; i++) {
|
|
if (numa_bitmask_isbitset(tmp_cpus, i)) {
|
|
set_bit(i, bitmap);
|
|
}
|
|
}
|
|
}
|
|
numa_free_cpumask(tmp_cpus);
|
|
|
|
if (bitmap_empty(bitmap, nbits)) {
|
|
error_setg(errp, "The nodes select no CPUs");
|
|
goto out;
|
|
}
|
|
|
|
if (tc->thread_id != -1) {
|
|
/*
|
|
* Note: we won't be adjusting the affinity of any thread that is still
|
|
* around for now, but only the affinity of the context thread.
|
|
*/
|
|
ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits);
|
|
if (ret) {
|
|
error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
|
|
}
|
|
} else {
|
|
tc->init_cpu_bitmap = bitmap;
|
|
bitmap = NULL;
|
|
tc->init_cpu_nbits = nbits;
|
|
}
|
|
out:
|
|
g_free(bitmap);
|
|
qapi_free_uint16List(host_nodes);
|
|
#else
|
|
error_setg(errp, "NUMA node affinity is not supported by this QEMU");
|
|
#endif
|
|
}
|
|
|
|
static void thread_context_get_thread_id(Object *obj, Visitor *v,
|
|
const char *name, void *opaque,
|
|
Error **errp)
|
|
{
|
|
ThreadContext *tc = THREAD_CONTEXT(obj);
|
|
uint64_t value = tc->thread_id;
|
|
|
|
visit_type_uint64(v, name, &value, errp);
|
|
}
|
|
|
|
static void thread_context_instance_complete(UserCreatable *uc, Error **errp)
|
|
{
|
|
ThreadContext *tc = THREAD_CONTEXT(uc);
|
|
char *thread_name;
|
|
int ret;
|
|
|
|
thread_name = g_strdup_printf("TC %s",
|
|
object_get_canonical_path_component(OBJECT(uc)));
|
|
qemu_thread_create(&tc->thread, thread_name, thread_context_run, tc,
|
|
QEMU_THREAD_JOINABLE);
|
|
g_free(thread_name);
|
|
|
|
/* Wait until initialization of the thread is done. */
|
|
while (tc->thread_id == -1) {
|
|
qemu_sem_wait(&tc->sem);
|
|
}
|
|
|
|
if (tc->init_cpu_bitmap) {
|
|
ret = qemu_thread_set_affinity(&tc->thread, tc->init_cpu_bitmap,
|
|
tc->init_cpu_nbits);
|
|
if (ret) {
|
|
error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret));
|
|
}
|
|
g_free(tc->init_cpu_bitmap);
|
|
tc->init_cpu_bitmap = NULL;
|
|
}
|
|
}
|
|
|
|
static void thread_context_class_init(ObjectClass *oc, void *data)
|
|
{
|
|
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
|
|
|
|
ucc->complete = thread_context_instance_complete;
|
|
object_class_property_add(oc, "thread-id", "int",
|
|
thread_context_get_thread_id, NULL, NULL,
|
|
NULL);
|
|
object_class_property_add(oc, "cpu-affinity", "int",
|
|
thread_context_get_cpu_affinity,
|
|
thread_context_set_cpu_affinity, NULL, NULL);
|
|
object_class_property_add(oc, "node-affinity", "int", NULL,
|
|
thread_context_set_node_affinity, NULL, NULL);
|
|
}
|
|
|
|
static void thread_context_instance_init(Object *obj)
|
|
{
|
|
ThreadContext *tc = THREAD_CONTEXT(obj);
|
|
|
|
tc->thread_id = -1;
|
|
qemu_sem_init(&tc->sem, 0);
|
|
qemu_sem_init(&tc->sem_thread, 0);
|
|
qemu_mutex_init(&tc->mutex);
|
|
}
|
|
|
|
static void thread_context_instance_finalize(Object *obj)
|
|
{
|
|
ThreadContext *tc = THREAD_CONTEXT(obj);
|
|
|
|
if (tc->thread_id != -1) {
|
|
tc->thread_cmd = TC_CMD_STOP;
|
|
qemu_sem_post(&tc->sem_thread);
|
|
qemu_thread_join(&tc->thread);
|
|
}
|
|
qemu_sem_destroy(&tc->sem);
|
|
qemu_sem_destroy(&tc->sem_thread);
|
|
qemu_mutex_destroy(&tc->mutex);
|
|
}
|
|
|
|
static const TypeInfo thread_context_info = {
|
|
.name = TYPE_THREAD_CONTEXT,
|
|
.parent = TYPE_OBJECT,
|
|
.class_init = thread_context_class_init,
|
|
.instance_size = sizeof(ThreadContext),
|
|
.instance_init = thread_context_instance_init,
|
|
.instance_finalize = thread_context_instance_finalize,
|
|
.interfaces = (InterfaceInfo[]) {
|
|
{ TYPE_USER_CREATABLE },
|
|
{ }
|
|
}
|
|
};
|
|
|
|
static void thread_context_register_types(void)
|
|
{
|
|
type_register_static(&thread_context_info);
|
|
}
|
|
type_init(thread_context_register_types)
|
|
|
|
void thread_context_create_thread(ThreadContext *tc, QemuThread *thread,
|
|
const char *name,
|
|
void *(*start_routine)(void *), void *arg,
|
|
int mode)
|
|
{
|
|
ThreadContextCmdNew data = {
|
|
.thread = thread,
|
|
.name = name,
|
|
.start_routine = start_routine,
|
|
.arg = arg,
|
|
.mode = mode,
|
|
};
|
|
|
|
qemu_mutex_lock(&tc->mutex);
|
|
tc->thread_cmd = TC_CMD_NEW;
|
|
tc->thread_cmd_data = &data;
|
|
qemu_sem_post(&tc->sem_thread);
|
|
|
|
while (tc->thread_cmd != TC_CMD_NONE) {
|
|
qemu_sem_wait(&tc->sem);
|
|
}
|
|
qemu_mutex_unlock(&tc->mutex);
|
|
}
|