2007-11-17 18:14:51 +01:00
|
|
|
/* Declarations for use by board files for creating devices. */
|
|
|
|
|
|
|
|
#ifndef HW_BOARDS_H
|
|
|
|
#define HW_BOARDS_H
|
|
|
|
|
2019-08-12 07:23:46 +02:00
|
|
|
#include "exec/memory.h"
|
2020-02-19 17:08:36 +01:00
|
|
|
#include "sysemu/hostmem.h"
|
2012-12-17 18:20:04 +01:00
|
|
|
#include "sysemu/blockdev.h"
|
2019-06-19 22:10:41 +02:00
|
|
|
#include "qapi/qapi-types-machine.h"
|
2019-05-23 16:35:07 +02:00
|
|
|
#include "qemu/module.h"
|
2014-03-05 18:30:45 +01:00
|
|
|
#include "qom/object.h"
|
2019-07-09 17:20:52 +02:00
|
|
|
#include "hw/core/cpu.h"
|
2009-07-15 13:48:21 +02:00
|
|
|
|
2014-05-14 11:43:15 +02:00
|
|
|
#define TYPE_MACHINE_SUFFIX "-machine"
|
2015-08-20 23:54:27 +02:00
|
|
|
|
|
|
|
/* Machine class name that needs to be used for class-name-based machine
|
|
|
|
* type lookup to work.
|
|
|
|
*/
|
|
|
|
#define MACHINE_TYPE_NAME(machinename) (machinename TYPE_MACHINE_SUFFIX)
|
|
|
|
|
2014-03-05 18:30:45 +01:00
|
|
|
#define TYPE_MACHINE "machine"
|
2014-03-18 16:26:35 +01:00
|
|
|
#undef MACHINE /* BSD defines it and QEMU does not use it */
|
2020-09-16 20:25:18 +02:00
|
|
|
OBJECT_DECLARE_TYPE(MachineState, MachineClass, MACHINE)
|
2014-03-05 18:30:45 +01:00
|
|
|
|
2014-03-05 18:30:47 +01:00
|
|
|
extern MachineState *current_machine;
|
|
|
|
|
2022-04-25 10:21:50 +02:00
|
|
|
void machine_add_audiodev_property(MachineClass *mc);
|
2022-04-14 18:52:59 +02:00
|
|
|
void machine_run_board_init(MachineState *machine, const char *mem_path, Error **errp);
|
2015-01-06 14:29:13 +01:00
|
|
|
bool machine_usb(MachineState *machine);
|
2015-02-04 16:43:53 +01:00
|
|
|
int machine_phandle_start(MachineState *machine);
|
2015-02-04 16:43:54 +01:00
|
|
|
bool machine_dump_guest_core(MachineState *machine);
|
2015-02-04 16:43:55 +01:00
|
|
|
bool machine_mem_merge(MachineState *machine);
|
2017-02-09 12:08:38 +01:00
|
|
|
HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
|
2017-05-10 13:29:50 +02:00
|
|
|
void machine_set_cpu_numa_node(MachineState *machine,
|
|
|
|
const CpuInstanceProperties *props,
|
|
|
|
Error **errp);
|
2021-11-11 10:21:23 +01:00
|
|
|
void machine_parse_smp_config(MachineState *ms,
|
|
|
|
const SMPConfiguration *config, Error **errp);
|
2023-06-28 15:54:34 +02:00
|
|
|
unsigned int machine_topo_get_cores_per_socket(const MachineState *ms);
|
|
|
|
unsigned int machine_topo_get_threads_per_socket(const MachineState *ms);
|
2023-06-23 14:45:45 +02:00
|
|
|
void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size);
|
2015-01-06 14:29:13 +01:00
|
|
|
|
2021-03-25 16:33:07 +01:00
|
|
|
/**
|
|
|
|
* machine_class_allow_dynamic_sysbus_dev: Add type to list of valid devices
|
|
|
|
* @mc: Machine class
|
|
|
|
* @type: type to allow (should be a subtype of TYPE_SYS_BUS_DEVICE)
|
|
|
|
*
|
|
|
|
* Add the QOM type @type to the list of devices of which are subtypes
|
|
|
|
* of TYPE_SYS_BUS_DEVICE but which are still permitted to be dynamically
|
|
|
|
* created (eg by the user on the command line with -device).
|
|
|
|
* By default if the user tries to create any devices on the command line
|
|
|
|
* that are subtypes of TYPE_SYS_BUS_DEVICE they will get an error message;
|
|
|
|
* for the special cases which are permitted for this machine model, the
|
|
|
|
* machine model class init code must call this function to add them
|
|
|
|
* to the list of specifically permitted devices.
|
|
|
|
*/
|
2017-11-25 16:16:05 +01:00
|
|
|
void machine_class_allow_dynamic_sysbus_dev(MachineClass *mc, const char *type);
|
2021-03-25 16:33:07 +01:00
|
|
|
|
2021-10-29 16:22:56 +02:00
|
|
|
/**
|
|
|
|
* device_type_is_dynamic_sysbus: Check if type is an allowed sysbus device
|
|
|
|
* type for the machine class.
|
|
|
|
* @mc: Machine class
|
|
|
|
* @type: type to check (should be a subtype of TYPE_SYS_BUS_DEVICE)
|
|
|
|
*
|
|
|
|
* Returns: true if @type is a type in the machine's list of
|
|
|
|
* dynamically pluggable sysbus devices; otherwise false.
|
|
|
|
*
|
|
|
|
* Check if the QOM type @type is in the list of allowed sysbus device
|
|
|
|
* types (see machine_class_allowed_dynamic_sysbus_dev()).
|
|
|
|
* Note that if @type has a parent type in the list, it is allowed too.
|
|
|
|
*/
|
|
|
|
bool device_type_is_dynamic_sysbus(MachineClass *mc, const char *type);
|
|
|
|
|
2021-03-25 16:33:08 +01:00
|
|
|
/**
|
|
|
|
* device_is_dynamic_sysbus: test whether device is a dynamic sysbus device
|
|
|
|
* @mc: Machine class
|
|
|
|
* @dev: device to check
|
|
|
|
*
|
|
|
|
* Returns: true if @dev is a sysbus device on the machine's list
|
|
|
|
* of dynamically pluggable sysbus devices; otherwise false.
|
|
|
|
*
|
|
|
|
* This function checks whether @dev is a valid dynamic sysbus device,
|
|
|
|
* by first confirming that it is a sysbus device and then checking it
|
|
|
|
* against the list of permitted dynamic sysbus devices which has been
|
|
|
|
* set up by the machine using machine_class_allow_dynamic_sysbus_dev().
|
|
|
|
*
|
|
|
|
* It is valid to call this with something that is not a subclass of
|
|
|
|
* TYPE_SYS_BUS_DEVICE; the function will return false in this case.
|
|
|
|
* This allows hotplug callback functions to be written as:
|
|
|
|
* if (device_is_dynamic_sysbus(mc, dev)) {
|
|
|
|
* handle dynamic sysbus case;
|
|
|
|
* } else if (some other kind of hotplug) {
|
|
|
|
* handle that;
|
|
|
|
* }
|
|
|
|
*/
|
|
|
|
bool device_is_dynamic_sysbus(MachineClass *mc, DeviceState *dev);
|
|
|
|
|
2020-02-19 17:08:38 +01:00
|
|
|
/*
|
|
|
|
* Checks that backend isn't used, preps it for exclusive usage and
|
|
|
|
* returns migratable MemoryRegion provided by backend.
|
|
|
|
*/
|
|
|
|
MemoryRegion *machine_consume_memdev(MachineState *machine,
|
|
|
|
HostMemoryBackend *backend);
|
2017-11-25 16:16:05 +01:00
|
|
|
|
2016-03-03 15:28:56 +01:00
|
|
|
/**
|
|
|
|
* CPUArchId:
|
|
|
|
* @arch_id - architecture-dependent CPU ID of present or possible CPU
|
|
|
|
* @cpu - pointer to corresponding CPU object if it's present on NULL otherwise
|
2018-01-10 16:22:50 +01:00
|
|
|
* @type - QOM class name of possible @cpu object
|
2017-02-09 12:08:34 +01:00
|
|
|
* @props - CPU object properties, initialized by board
|
2017-02-09 12:08:38 +01:00
|
|
|
* #vcpus_count - number of threads provided by @cpu object
|
2016-03-03 15:28:56 +01:00
|
|
|
*/
|
2019-08-12 07:23:53 +02:00
|
|
|
typedef struct CPUArchId {
|
2016-03-03 15:28:56 +01:00
|
|
|
uint64_t arch_id;
|
2017-02-09 12:08:38 +01:00
|
|
|
int64_t vcpus_count;
|
2017-02-09 12:08:34 +01:00
|
|
|
CpuInstanceProperties props;
|
2017-02-09 12:08:36 +01:00
|
|
|
Object *cpu;
|
2018-01-10 16:22:50 +01:00
|
|
|
const char *type;
|
2016-03-03 15:28:56 +01:00
|
|
|
} CPUArchId;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* CPUArchIdList:
|
|
|
|
* @len - number of @CPUArchId items in @cpus array
|
|
|
|
* @cpus - array of present or possible CPUs for current machine configuration
|
|
|
|
*/
|
|
|
|
typedef struct {
|
|
|
|
int len;
|
2020-03-04 16:38:16 +01:00
|
|
|
CPUArchId cpus[];
|
2016-03-03 15:28:56 +01:00
|
|
|
} CPUArchIdList;
|
|
|
|
|
2021-09-29 04:58:12 +02:00
|
|
|
/**
|
|
|
|
* SMPCompatProps:
|
2021-09-29 04:58:14 +02:00
|
|
|
* @prefer_sockets - whether sockets are preferred over cores in smp parsing
|
2021-09-29 04:58:12 +02:00
|
|
|
* @dies_supported - whether dies are supported by the machine
|
hw/core/machine: Introduce CPU cluster topology support
The new Cluster-Aware Scheduling support has landed in Linux 5.16,
which has been proved to benefit the scheduling performance (e.g.
load balance and wake_affine strategy) on both x86_64 and AArch64.
So now in Linux 5.16 we have four-level arch-neutral CPU topology
definition like below and a new scheduler level for clusters.
struct cpu_topology {
int thread_id;
int core_id;
int cluster_id;
int package_id;
int llc_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
cpumask_t cluster_sibling;
cpumask_t llc_sibling;
}
A cluster generally means a group of CPU cores which share L2 cache
or other mid-level resources, and it is the shared resources that
is used to improve scheduler's behavior. From the point of view of
the size range, it's between CPU die and CPU core. For example, on
some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node,
and 4 CPU cores in each cluster. The 4 CPU cores share a separate
L2 cache and a L3 cache tag, which brings cache affinity advantage.
In virtualization, on the Hosts which have pClusters (physical
clusters), if we can design a vCPU topology with cluster level for
guest kernel and have a dedicated vCPU pinning. A Cluster-Aware
Guest kernel can also make use of the cache affinity of CPU clusters
to gain similar scheduling performance.
This patch adds infrastructure for CPU cluster level topology
configuration and parsing, so that the user can specify cluster
parameter if their machines support it.
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
[PMD: Added '(since 7.0)' to @clusters in qapi/machine.json]
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 10:22:09 +01:00
|
|
|
* @clusters_supported - whether clusters are supported by the machine
|
2022-12-29 07:55:09 +01:00
|
|
|
* @has_clusters - whether clusters are explicitly specified in the user
|
|
|
|
* provided SMP configuration
|
2021-09-29 04:58:12 +02:00
|
|
|
*/
|
|
|
|
typedef struct {
|
2021-09-29 04:58:14 +02:00
|
|
|
bool prefer_sockets;
|
2021-09-29 04:58:12 +02:00
|
|
|
bool dies_supported;
|
hw/core/machine: Introduce CPU cluster topology support
The new Cluster-Aware Scheduling support has landed in Linux 5.16,
which has been proved to benefit the scheduling performance (e.g.
load balance and wake_affine strategy) on both x86_64 and AArch64.
So now in Linux 5.16 we have four-level arch-neutral CPU topology
definition like below and a new scheduler level for clusters.
struct cpu_topology {
int thread_id;
int core_id;
int cluster_id;
int package_id;
int llc_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
cpumask_t cluster_sibling;
cpumask_t llc_sibling;
}
A cluster generally means a group of CPU cores which share L2 cache
or other mid-level resources, and it is the shared resources that
is used to improve scheduler's behavior. From the point of view of
the size range, it's between CPU die and CPU core. For example, on
some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node,
and 4 CPU cores in each cluster. The 4 CPU cores share a separate
L2 cache and a L3 cache tag, which brings cache affinity advantage.
In virtualization, on the Hosts which have pClusters (physical
clusters), if we can design a vCPU topology with cluster level for
guest kernel and have a dedicated vCPU pinning. A Cluster-Aware
Guest kernel can also make use of the cache affinity of CPU clusters
to gain similar scheduling performance.
This patch adds infrastructure for CPU cluster level topology
configuration and parsing, so that the user can specify cluster
parameter if their machines support it.
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
[PMD: Added '(since 7.0)' to @clusters in qapi/machine.json]
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 10:22:09 +01:00
|
|
|
bool clusters_supported;
|
2022-12-29 07:55:09 +01:00
|
|
|
bool has_clusters;
|
2021-09-29 04:58:12 +02:00
|
|
|
} SMPCompatProps;
|
|
|
|
|
2014-03-05 18:30:45 +01:00
|
|
|
/**
|
|
|
|
* MachineClass:
|
2018-06-25 11:05:12 +02:00
|
|
|
* @deprecation_reason: If set, the machine is marked as deprecated. The
|
|
|
|
* string should provide some clear information about what to use instead.
|
2017-11-13 14:55:27 +01:00
|
|
|
* @max_cpus: maximum number of CPUs supported. Default: 1
|
|
|
|
* @min_cpus: minimum number of CPUs supported. Default: 1
|
|
|
|
* @default_cpus: number of CPUs instantiated if none are specified. Default: 1
|
2020-02-07 17:19:47 +01:00
|
|
|
* @is_default:
|
|
|
|
* If true QEMU will use this machine by default if no '-M' option is given.
|
2014-06-02 15:25:03 +02:00
|
|
|
* @get_hotplug_handler: this function is called during bus-less
|
|
|
|
* device hotplug. If defined it returns pointer to an instance
|
|
|
|
* of HotplugHandler object, which handles hotplug operation
|
|
|
|
* for a given @dev. It may return NULL if @dev doesn't require
|
|
|
|
* any actions to be performed by hotplug handler.
|
2017-05-10 13:29:45 +02:00
|
|
|
* @cpu_index_to_instance_props:
|
|
|
|
* used to provide @cpu_index to socket/core/thread number mapping, allowing
|
2023-07-14 13:33:02 +02:00
|
|
|
* legacy code to perform mapping from cpu_index to topology properties
|
2017-05-10 13:29:45 +02:00
|
|
|
* Returns: tuple of socket/core/thread ids given cpu_index belongs to.
|
2015-03-19 18:09:21 +01:00
|
|
|
* used to provide @cpu_index to socket number mapping, allowing
|
|
|
|
* a machine to group CPU threads belonging to the same socket/package
|
|
|
|
* Returns: socket number given cpu_index belongs to.
|
2015-11-12 18:29:54 +01:00
|
|
|
* @hw_version:
|
|
|
|
* Value of QEMU_VERSION when the machine was added to QEMU.
|
|
|
|
* Set only by old machines because they need to keep
|
|
|
|
* compatibility on code that exposed QEMU_VERSION to guests in
|
|
|
|
* the past (and now use qemu_hw_version()).
|
2016-03-03 15:28:56 +01:00
|
|
|
* @possible_cpu_arch_ids:
|
|
|
|
* Returns an array of @CPUArchId architecture-dependent CPU IDs
|
|
|
|
* which includes CPU IDs for present and possible to hotplug CPUs.
|
|
|
|
* Caller is responsible for freeing returned list.
|
2017-06-01 12:53:28 +02:00
|
|
|
* @get_default_cpu_node_id:
|
|
|
|
* returns default board specific node_id value for CPU slot specified by
|
|
|
|
* index @idx in @ms->possible_cpus[]
|
2017-02-10 11:20:57 +01:00
|
|
|
* @has_hotpluggable_cpus:
|
|
|
|
* If true, board supports CPUs creation with -device/device_add.
|
2017-09-13 18:04:55 +02:00
|
|
|
* @default_cpu_type:
|
|
|
|
* specifies default CPU_TYPE, which will be used for parsing target
|
|
|
|
* specific features and for creating CPUs if CPU name wasn't provided
|
|
|
|
* explicitly at CLI
|
2016-10-24 17:26:49 +02:00
|
|
|
* @minimum_page_bits:
|
|
|
|
* If non-zero, the board promises never to create a CPU with a page size
|
|
|
|
* smaller than this, so QEMU can use a more efficient larger page
|
|
|
|
* size than the target architecture's minimum. (Attempting to create
|
|
|
|
* such a CPU will fail.) Note that changing this is a migration
|
|
|
|
* compatibility break for the machine.
|
2017-09-07 14:54:54 +02:00
|
|
|
* @ignore_memory_transaction_failures:
|
|
|
|
* If this is flag is true then the CPU will ignore memory transaction
|
|
|
|
* failures which should cause the CPU to take an exception due to an
|
|
|
|
* access to an unassigned physical address; the transaction will instead
|
|
|
|
* return zero (for a read) or be ignored (for a write). This should be
|
|
|
|
* set only by legacy board models which rely on the old RAZ/WI behaviour
|
|
|
|
* for handling devices that QEMU does not yet model. New board models
|
|
|
|
* should instead use "unimplemented-device" for all memory ranges where
|
|
|
|
* the guest will attempt to probe for a device that QEMU doesn't
|
|
|
|
* implement and a stub device is required.
|
2019-03-04 11:13:33 +01:00
|
|
|
* @kvm_type:
|
|
|
|
* Return the type of KVM corresponding to the kvm-type string option or
|
|
|
|
* computed based on other criteria such as the host kernel capabilities.
|
2021-03-10 14:52:17 +01:00
|
|
|
* kvm-type may be NULL if it is not needed.
|
2019-06-10 15:10:07 +02:00
|
|
|
* @numa_mem_supported:
|
|
|
|
* true if '--numa node.mem' option is supported and false otherwise
|
2019-09-16 10:07:16 +02:00
|
|
|
* @hotplug_allowed:
|
|
|
|
* If the hook is provided, then it'll be called for each device
|
|
|
|
* hotplug to check whether the device hotplug is allowed. Return
|
|
|
|
* true to grant allowance or false to reject the hotplug. When
|
|
|
|
* false is returned, an error must be set to show the reason of
|
|
|
|
* the rejection. If the hook is not provided, all hotplug will be
|
|
|
|
* allowed.
|
2020-02-19 17:08:37 +01:00
|
|
|
* @default_ram_id:
|
2023-07-14 13:33:02 +02:00
|
|
|
* Specifies initial RAM MemoryRegion name to be used for default backend
|
2020-02-19 17:08:37 +01:00
|
|
|
* creation if user explicitly hasn't specified backend with "memory-backend"
|
|
|
|
* property.
|
2023-07-14 13:33:02 +02:00
|
|
|
* It also will be used as a way to option into "-m" option support.
|
2020-02-19 17:08:37 +01:00
|
|
|
* If it's not set by board, '-m' will be ignored and generic code will
|
|
|
|
* not create default RAM MemoryRegion.
|
2020-04-01 14:37:54 +02:00
|
|
|
* @fixup_ram_size:
|
|
|
|
* Amends user provided ram size (with -m option) using machine
|
|
|
|
* specific algorithm. To be used by old machine types for compat
|
|
|
|
* purposes only.
|
|
|
|
* Applies only to default memory backend, i.e., explicit memory backend
|
|
|
|
* wasn't used.
|
2014-03-05 18:30:45 +01:00
|
|
|
*/
|
|
|
|
struct MachineClass {
|
|
|
|
/*< private >*/
|
|
|
|
ObjectClass parent_class;
|
|
|
|
/*< public >*/
|
|
|
|
|
well-defined listing order for machine types
Commit 261747f1 ("vl: Use MachineClass instead of global QEMUMachine
list") broke the ordering of the machine types in the user-visible output
of
qemu-system-XXXX -M \?
This occurred because registration was rebased from a manually maintained
linked list to GLib hash tables:
qemu_register_machine()
type_register()
type_register_internal()
type_table_add()
g_hash_table_insert()
and because the listing was rebased accordingly, from the traversal of the
list to the traversal of the hash table (rendered as an ad-hoc list):
machine_parse()
object_class_get_list(TYPE_MACHINE)
object_class_foreach()
g_hash_table_foreach()
The current order is a "random" one, for practical purposes, which is
annoying for users.
Introduce new members QEMUMachine.family and MachineClass.family, allowing
machine types to be "clustered". Introduce a comparator function that
establishes a total ordering between machine types, ordering machine types
in the same family next to each other. In machine_parse(), list the
supported machine types sorted with the comparator function.
The comparator function:
- sorts whole families before standalone machine types,
- sorts whole families between each other in alphabetically increasing
order,
- sorts machine types inside the same family in alphabetically decreasing
order,
- sorts standalone machine types between each other in alphabetically
increasing order.
After this patch, all machine types are considered standalone, and
accordingly, the output is alphabetically ascending. This will be refined
in the following patches.
Effects on the x86_64 output:
Before:
> Supported machines are:
> pc-0.13 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-2.0 Standard PC (i440FX + PIIX, 1996)
> pc-1.0 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-2.1 Standard PC (i440FX + PIIX, 1996)
> pc-q35-1.7 Standard PC (Q35 + ICH9, 2009)
> pc-1.1 Standard PC (i440FX + PIIX, 1996)
> pc-0.14 Standard PC (i440FX + PIIX, 1996)
> pc-q35-2.0 Standard PC (Q35 + ICH9, 2009)
> pc-i440fx-1.4 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-1.5 Standard PC (i440FX + PIIX, 1996)
> pc-0.15 Standard PC (i440FX + PIIX, 1996)
> pc-q35-1.4 Standard PC (Q35 + ICH9, 2009)
> isapc ISA-only PC
> pc Standard PC (i440FX + PIIX, 1996) (alias of pc-i440fx-2.2)
> pc-i440fx-2.2 Standard PC (i440FX + PIIX, 1996) (default)
> pc-1.2 Standard PC (i440FX + PIIX, 1996)
> pc-0.10 Standard PC (i440FX + PIIX, 1996)
> pc-0.11 Standard PC (i440FX + PIIX, 1996)
> pc-q35-2.1 Standard PC (Q35 + ICH9, 2009)
> q35 Standard PC (Q35 + ICH9, 2009) (alias of pc-q35-2.2)
> pc-q35-2.2 Standard PC (Q35 + ICH9, 2009)
> pc-i440fx-1.6 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-1.7 Standard PC (i440FX + PIIX, 1996)
> none empty machine
> pc-q35-1.5 Standard PC (Q35 + ICH9, 2009)
> pc-q35-1.6 Standard PC (Q35 + ICH9, 2009)
> pc-0.12 Standard PC (i440FX + PIIX, 1996)
> pc-1.3 Standard PC (i440FX + PIIX, 1996)
After:
> Supported machines are:
> isapc ISA-only PC
> none empty machine
> pc-0.10 Standard PC (i440FX + PIIX, 1996)
> pc-0.11 Standard PC (i440FX + PIIX, 1996)
> pc-0.12 Standard PC (i440FX + PIIX, 1996)
> pc-0.13 Standard PC (i440FX + PIIX, 1996)
> pc-0.14 Standard PC (i440FX + PIIX, 1996)
> pc-0.15 Standard PC (i440FX + PIIX, 1996)
> pc-1.0 Standard PC (i440FX + PIIX, 1996)
> pc-1.1 Standard PC (i440FX + PIIX, 1996)
> pc-1.2 Standard PC (i440FX + PIIX, 1996)
> pc-1.3 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-1.4 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-1.5 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-1.6 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-1.7 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-2.0 Standard PC (i440FX + PIIX, 1996)
> pc-i440fx-2.1 Standard PC (i440FX + PIIX, 1996)
> pc Standard PC (i440FX + PIIX, 1996) (alias of pc-i440fx-2.2)
> pc-i440fx-2.2 Standard PC (i440FX + PIIX, 1996) (default)
> pc-q35-1.4 Standard PC (Q35 + ICH9, 2009)
> pc-q35-1.5 Standard PC (Q35 + ICH9, 2009)
> pc-q35-1.6 Standard PC (Q35 + ICH9, 2009)
> pc-q35-1.7 Standard PC (Q35 + ICH9, 2009)
> pc-q35-2.0 Standard PC (Q35 + ICH9, 2009)
> pc-q35-2.1 Standard PC (Q35 + ICH9, 2009)
> q35 Standard PC (Q35 + ICH9, 2009) (alias of pc-q35-2.2)
> pc-q35-2.2 Standard PC (Q35 + ICH9, 2009)
Effects on the aarch64 output:
Before:
> Supported machines are:
> lm3s811evb Stellaris LM3S811EVB
> canon-a1100 Canon PowerShot A1100 IS
> vexpress-a15 ARM Versatile Express for Cortex-A15
> vexpress-a9 ARM Versatile Express for Cortex-A9
> xilinx-zynq-a9 Xilinx Zynq Platform Baseboard for Cortex-A9
> connex Gumstix Connex (PXA255)
> n800 Nokia N800 tablet aka. RX-34 (OMAP2420)
> lm3s6965evb Stellaris LM3S6965EVB
> versatileab ARM Versatile/AB (ARM926EJ-S)
> borzoi Borzoi PDA (PXA270)
> tosa Tosa PDA (PXA255)
> cheetah Palm Tungsten|E aka. Cheetah PDA (OMAP310)
> midway Calxeda Midway (ECX-2000)
> mainstone Mainstone II (PXA27x)
> n810 Nokia N810 tablet aka. RX-44 (OMAP2420)
> terrier Terrier PDA (PXA270)
> highbank Calxeda Highbank (ECX-1000)
> cubieboard cubietech cubieboard
> sx1-v1 Siemens SX1 (OMAP310) V1
> sx1 Siemens SX1 (OMAP310) V2
> realview-eb-mpcore ARM RealView Emulation Baseboard (ARM11MPCore)
> kzm ARM KZM Emulation Baseboard (ARM1136)
> akita Akita PDA (PXA270)
> z2 Zipit Z2 (PXA27x)
> musicpal Marvell 88w8618 / MusicPal (ARM926EJ-S)
> realview-pb-a8 ARM RealView Platform Baseboard for Cortex-A8
> versatilepb ARM Versatile/PB (ARM926EJ-S)
> realview-eb ARM RealView Emulation Baseboard (ARM926EJ-S)
> realview-pbx-a9 ARM RealView Platform Baseboard Explore for Cortex-A9
> spitz Spitz PDA (PXA270)
> none empty machine
> virt ARM Virtual Machine
> collie Collie PDA (SA-1110)
> smdkc210 Samsung SMDKC210 board (Exynos4210)
> verdex Gumstix Verdex (PXA270)
> nuri Samsung NURI board (Exynos4210)
> integratorcp ARM Integrator/CP (ARM926EJ-S)
After:
> Supported machines are:
> akita Akita PDA (PXA270)
> borzoi Borzoi PDA (PXA270)
> canon-a1100 Canon PowerShot A1100 IS
> cheetah Palm Tungsten|E aka. Cheetah PDA (OMAP310)
> collie Collie PDA (SA-1110)
> connex Gumstix Connex (PXA255)
> cubieboard cubietech cubieboard
> highbank Calxeda Highbank (ECX-1000)
> integratorcp ARM Integrator/CP (ARM926EJ-S)
> kzm ARM KZM Emulation Baseboard (ARM1136)
> lm3s6965evb Stellaris LM3S6965EVB
> lm3s811evb Stellaris LM3S811EVB
> mainstone Mainstone II (PXA27x)
> midway Calxeda Midway (ECX-2000)
> musicpal Marvell 88w8618 / MusicPal (ARM926EJ-S)
> n800 Nokia N800 tablet aka. RX-34 (OMAP2420)
> n810 Nokia N810 tablet aka. RX-44 (OMAP2420)
> none empty machine
> nuri Samsung NURI board (Exynos4210)
> realview-eb ARM RealView Emulation Baseboard (ARM926EJ-S)
> realview-eb-mpcore ARM RealView Emulation Baseboard (ARM11MPCore)
> realview-pb-a8 ARM RealView Platform Baseboard for Cortex-A8
> realview-pbx-a9 ARM RealView Platform Baseboard Explore for Cortex-A9
> smdkc210 Samsung SMDKC210 board (Exynos4210)
> spitz Spitz PDA (PXA270)
> sx1 Siemens SX1 (OMAP310) V2
> sx1-v1 Siemens SX1 (OMAP310) V1
> terrier Terrier PDA (PXA270)
> tosa Tosa PDA (PXA255)
> verdex Gumstix Verdex (PXA270)
> versatileab ARM Versatile/AB (ARM926EJ-S)
> versatilepb ARM Versatile/PB (ARM926EJ-S)
> vexpress-a15 ARM Versatile Express for Cortex-A15
> vexpress-a9 ARM Versatile Express for Cortex-A9
> virt ARM Virtual Machine
> xilinx-zynq-a9 Xilinx Zynq Platform Baseboard for Cortex-A9
> z2 Zipit Z2 (PXA27x)
RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1145042
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel.a@redhat.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
2014-09-22 22:38:35 +02:00
|
|
|
const char *family; /* NULL iff @name identifies a standalone machtype */
|
2016-07-13 12:45:50 +02:00
|
|
|
char *name;
|
2014-04-09 19:34:50 +02:00
|
|
|
const char *alias;
|
|
|
|
const char *desc;
|
2018-06-25 11:05:12 +02:00
|
|
|
const char *deprecation_reason;
|
2014-04-09 19:34:50 +02:00
|
|
|
|
2014-05-07 16:42:57 +02:00
|
|
|
void (*init)(MachineState *state);
|
2022-10-25 02:43:17 +02:00
|
|
|
void (*reset)(MachineState *state, ShutdownCause reason);
|
2019-07-22 07:32:13 +02:00
|
|
|
void (*wakeup)(MachineState *state);
|
2019-03-04 11:13:33 +01:00
|
|
|
int (*kvm_type)(MachineState *machine, const char *arg);
|
2014-04-09 19:34:50 +02:00
|
|
|
|
|
|
|
BlockInterfaceType block_default_type;
|
pc/vl: Add units-per-default-bus property
This patch adds the 'units_per_default_bus' property which
allows individual boards to declare their desired
index => (bus,unit) mapping for their default HBA, so that
boards such as Q35 can specify that its default if_ide HBA,
AHCI, only accepts one unit per bus.
This property only overrides the mapping for drives matching
the block_default_type interface.
This patch also adds this property to *all* past and present
Q35 machine types. This retroactive addition is justified
because the previous erroneous index=>(bus,unit) mappings
caused by lack of such a property were not utilized due to
lack of initialization code in the Q35 init routine.
Further, semantically, the Q35 board type has always had the
property that its default HBA, AHCI, only accepts one unit per
bus. The new code added to add devices to drives relies upon
the accuracy of this mapping. Thus, the property is applied
retroactively to reduce complexity of allowing IDE HBAs with
different units per bus.
Examples:
Prior to this patch, all IDE HBAs were assumed to use 2 units
per bus (Master, Slave). When using Q35 and AHCI, however, we
only allow one unit per bus.
-hdb foo.qcow2 would become index=1, or bus=0,unit=1.
-hdd foo.qcow2 would become index=3, or bus=1,unit=1.
-drive file=foo.qcow2,index=5 becomes bus=2,unit=1.
These are invalid for AHCI. They now become, under Q35 only:
-hdb foo.qcow2 --> index=1, bus=1, unit=0.
-hdd foo.qcow2 --> index=3, bus=3, unit=0.
-drive file=foo.qcow2,index=5 --> bus=5,unit=0.
The mapping is adjusted based on the fact that the default IF
for the Q35 machine type is IF_IDE, and units-per-default-bus
overrides the IDE mapping from its default of 2 units per bus
to just 1 unit per bus.
Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Message-id: 1412187569-23452-4-git-send-email-jsnow@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2014-10-01 20:19:26 +02:00
|
|
|
int units_per_default_bus;
|
2014-04-09 19:34:50 +02:00
|
|
|
int max_cpus;
|
2017-11-13 14:55:27 +01:00
|
|
|
int min_cpus;
|
|
|
|
int default_cpus;
|
2014-04-09 19:34:50 +02:00
|
|
|
unsigned int no_serial:1,
|
|
|
|
no_parallel:1,
|
|
|
|
no_floppy:1,
|
|
|
|
no_cdrom:1,
|
2014-07-01 16:14:41 +02:00
|
|
|
no_sdcard:1,
|
2016-04-07 16:12:58 +02:00
|
|
|
pci_allow_0_address:1,
|
|
|
|
legacy_fw_cfg_order:1;
|
2020-02-07 17:19:47 +01:00
|
|
|
bool is_default;
|
2014-04-09 19:34:50 +02:00
|
|
|
const char *default_machine_opts;
|
|
|
|
const char *default_boot_order;
|
2014-10-28 10:09:11 +01:00
|
|
|
const char *default_display;
|
2023-05-10 20:46:21 +02:00
|
|
|
const char *default_nic;
|
2018-12-01 20:44:11 +01:00
|
|
|
GPtrArray *compat_props;
|
2014-04-09 19:34:50 +02:00
|
|
|
const char *hw_version;
|
2015-05-07 07:33:57 +02:00
|
|
|
ram_addr_t default_ram_size;
|
2017-09-13 18:04:55 +02:00
|
|
|
const char *default_cpu_type;
|
2018-12-20 06:40:35 +01:00
|
|
|
bool default_kernel_irqchip_split;
|
2015-12-01 23:58:08 +01:00
|
|
|
bool option_rom_has_mr;
|
|
|
|
bool rom_file_has_mr;
|
2016-10-24 17:26:49 +02:00
|
|
|
int minimum_page_bits;
|
2017-02-10 11:20:57 +01:00
|
|
|
bool has_hotpluggable_cpus;
|
2017-09-07 14:54:54 +02:00
|
|
|
bool ignore_memory_transaction_failures;
|
2017-03-21 11:25:42 +01:00
|
|
|
int numa_mem_align_shift;
|
2017-10-03 22:05:09 +02:00
|
|
|
const char **valid_cpu_types;
|
2017-11-25 16:16:05 +01:00
|
|
|
strList *allowed_dynamic_sysbus_devices;
|
2017-11-14 03:34:01 +01:00
|
|
|
bool auto_enable_numa_with_memhp;
|
2020-06-26 09:22:48 +02:00
|
|
|
bool auto_enable_numa_with_memdev;
|
2018-08-10 14:40:27 +02:00
|
|
|
bool ignore_boot_device_suffixes;
|
2018-11-14 21:41:01 +01:00
|
|
|
bool smbus_no_migration_support;
|
2019-03-08 19:20:53 +01:00
|
|
|
bool nvdimm_supported;
|
2019-06-10 15:10:07 +02:00
|
|
|
bool numa_mem_supported;
|
2019-09-05 10:32:38 +02:00
|
|
|
bool auto_enable_numa;
|
numa: Validate cluster and NUMA node boundary if required
For some architectures like ARM64, multiple CPUs in one cluster can be
associated with different NUMA nodes, which is irregular configuration
because we shouldn't have this in baremetal environment. The irregular
configuration causes Linux guest to misbehave, as the following warning
messages indicate.
-smp 6,maxcpus=6,sockets=2,clusters=1,cores=3,threads=1 \
-numa node,nodeid=0,cpus=0-1,memdev=ram0 \
-numa node,nodeid=1,cpus=2-3,memdev=ram1 \
-numa node,nodeid=2,cpus=4-5,memdev=ram2 \
------------[ cut here ]------------
WARNING: CPU: 0 PID: 1 at kernel/sched/topology.c:2271 build_sched_domains+0x284/0x910
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-268.el9.aarch64 #1
pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : build_sched_domains+0x284/0x910
lr : build_sched_domains+0x184/0x910
sp : ffff80000804bd50
x29: ffff80000804bd50 x28: 0000000000000002 x27: 0000000000000000
x26: ffff800009cf9a80 x25: 0000000000000000 x24: ffff800009cbf840
x23: ffff000080325000 x22: ffff0000005df800 x21: ffff80000a4ce508
x20: 0000000000000000 x19: ffff000080324440 x18: 0000000000000014
x17: 00000000388925c0 x16: 000000005386a066 x15: 000000009c10cc2e
x14: 00000000000001c0 x13: 0000000000000001 x12: ffff00007fffb1a0
x11: ffff00007fffb180 x10: ffff80000a4ce508 x9 : 0000000000000041
x8 : ffff80000a4ce500 x7 : ffff80000a4cf920 x6 : 0000000000000001
x5 : 0000000000000001 x4 : 0000000000000007 x3 : 0000000000000002
x2 : 0000000000001000 x1 : ffff80000a4cf928 x0 : 0000000000000001
Call trace:
build_sched_domains+0x284/0x910
sched_init_domains+0xac/0xe0
sched_init_smp+0x48/0xc8
kernel_init_freeable+0x140/0x1ac
kernel_init+0x28/0x140
ret_from_fork+0x10/0x20
Improve the situation to warn when multiple CPUs in one cluster have
been associated with different NUMA nodes. However, one NUMA node is
allowed to be associated with different clusters.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Acked-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Acked-by: Igor Mammedov <imammedo@redhat.com>
Message-Id: <20230509002739.18388-2-gshan@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2023-05-09 02:27:37 +02:00
|
|
|
bool cpu_cluster_has_numa_boundary;
|
2021-09-29 04:58:12 +02:00
|
|
|
SMPCompatProps smp_props;
|
2020-02-19 17:08:37 +01:00
|
|
|
const char *default_ram_id;
|
2014-06-02 15:25:03 +02:00
|
|
|
|
|
|
|
HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
|
|
|
|
DeviceState *dev);
|
2019-09-16 10:07:16 +02:00
|
|
|
bool (*hotplug_allowed)(MachineState *state, DeviceState *dev,
|
|
|
|
Error **errp);
|
2017-05-10 13:29:45 +02:00
|
|
|
CpuInstanceProperties (*cpu_index_to_instance_props)(MachineState *machine,
|
|
|
|
unsigned cpu_index);
|
2017-01-18 18:13:20 +01:00
|
|
|
const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
|
2017-06-01 12:53:28 +02:00
|
|
|
int64_t (*get_default_cpu_node_id)(const MachineState *ms, int idx);
|
2020-04-01 14:37:54 +02:00
|
|
|
ram_addr_t (*fixup_ram_size)(ram_addr_t size);
|
2014-03-05 18:30:45 +01:00
|
|
|
};
|
|
|
|
|
2018-04-23 18:51:17 +02:00
|
|
|
/**
|
2018-04-23 18:51:23 +02:00
|
|
|
* DeviceMemoryState:
|
2018-04-23 18:51:17 +02:00
|
|
|
* @base: address in guest physical address space where the memory
|
|
|
|
* address space for memory devices starts
|
2023-09-26 20:57:28 +02:00
|
|
|
* @mr: memory region container for memory devices
|
|
|
|
* @as: address space for memory devices
|
|
|
|
* @listener: memory listener used to track used memslots in the address space
|
2023-03-09 08:52:57 +01:00
|
|
|
* @dimm_size: the sum of plugged DIMMs' sizes
|
2023-06-23 14:45:53 +02:00
|
|
|
* @used_region_size: the part of @mr already used by memory devices
|
2023-09-26 20:57:28 +02:00
|
|
|
* @required_memslots: the number of memslots required by memory devices
|
|
|
|
* @used_memslots: the number of memslots currently used by memory devices
|
memory-device,vhost: Support automatic decision on the number of memslots
We want to support memory devices that can automatically decide how many
memslots they will use. In the worst case, they have to use a single
memslot.
The target use cases are virtio-mem and the hyper-v balloon.
Let's calculate a reasonable limit such a memory device may use, and
instruct the device to make a decision based on that limit. Use a simple
heuristic that considers:
* A memslot soft-limit for all memory devices of 256; also, to not
consume too many memslots -- which could harm performance.
* Actually still free and unreserved memslots
* The percentage of the remaining device memory region that memory device
will occupy.
Further, while we properly check before plugging a memory device whether
there still is are free memslots, we have other memslot consumers (such as
boot memory, PCI BARs) that don't perform any checks and might dynamically
consume memslots without any prior reservation. So we might succeed in
plugging a memory device, but once we dynamically map a PCI BAR we would
be in trouble. Doing accounting / reservation / checks for all such
users is problematic (e.g., sometimes we might temporarily split boot
memory into two memslots, triggered by the BIOS).
We use the historic magic memslot number of 509 as orientation to when
supporting 256 memory devices -> memslots (leaving 253 for boot memory and
other devices) has been proven to work reliable. We'll fallback to
suggesting a single memslot if we don't have at least 509 total memslots.
Plugging vhost devices with less than 509 memslots available while we
have memory devices plugged that consume multiple memslots due to
automatic decisions can be problematic. Most configurations might just fail
due to "limit < used + reserved", however, it can also happen that these
memory devices would suddenly consume memslots that would actually be
required by other memslot consumers (boot, PCI BARs) later. Note that this
has always been sketchy with vhost devices that support only a small number
of memslots; but we don't want to make it any worse.So let's keep it simple
and simply reject plugging such vhost devices in such a configuration.
Eventually, all vhost devices that want to be fully compatible with such
memory devices should support a decent number of memslots (>= 509).
Message-ID: <20230926185738.277351-13-david@redhat.com>
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-26 20:57:32 +02:00
|
|
|
* @memslot_auto_decision_active: whether any plugged memory device
|
|
|
|
* automatically decided to use more than
|
|
|
|
* one memslot
|
2018-04-23 18:51:17 +02:00
|
|
|
*/
|
2018-04-23 18:51:23 +02:00
|
|
|
typedef struct DeviceMemoryState {
|
2018-04-23 18:51:17 +02:00
|
|
|
hwaddr base;
|
|
|
|
MemoryRegion mr;
|
2023-09-26 20:57:28 +02:00
|
|
|
AddressSpace as;
|
|
|
|
MemoryListener listener;
|
2023-03-09 08:52:57 +01:00
|
|
|
uint64_t dimm_size;
|
2023-06-23 14:45:53 +02:00
|
|
|
uint64_t used_region_size;
|
2023-09-26 20:57:28 +02:00
|
|
|
unsigned int required_memslots;
|
|
|
|
unsigned int used_memslots;
|
memory-device,vhost: Support automatic decision on the number of memslots
We want to support memory devices that can automatically decide how many
memslots they will use. In the worst case, they have to use a single
memslot.
The target use cases are virtio-mem and the hyper-v balloon.
Let's calculate a reasonable limit such a memory device may use, and
instruct the device to make a decision based on that limit. Use a simple
heuristic that considers:
* A memslot soft-limit for all memory devices of 256; also, to not
consume too many memslots -- which could harm performance.
* Actually still free and unreserved memslots
* The percentage of the remaining device memory region that memory device
will occupy.
Further, while we properly check before plugging a memory device whether
there still is are free memslots, we have other memslot consumers (such as
boot memory, PCI BARs) that don't perform any checks and might dynamically
consume memslots without any prior reservation. So we might succeed in
plugging a memory device, but once we dynamically map a PCI BAR we would
be in trouble. Doing accounting / reservation / checks for all such
users is problematic (e.g., sometimes we might temporarily split boot
memory into two memslots, triggered by the BIOS).
We use the historic magic memslot number of 509 as orientation to when
supporting 256 memory devices -> memslots (leaving 253 for boot memory and
other devices) has been proven to work reliable. We'll fallback to
suggesting a single memslot if we don't have at least 509 total memslots.
Plugging vhost devices with less than 509 memslots available while we
have memory devices plugged that consume multiple memslots due to
automatic decisions can be problematic. Most configurations might just fail
due to "limit < used + reserved", however, it can also happen that these
memory devices would suddenly consume memslots that would actually be
required by other memslot consumers (boot, PCI BARs) later. Note that this
has always been sketchy with vhost devices that support only a small number
of memslots; but we don't want to make it any worse.So let's keep it simple
and simply reject plugging such vhost devices in such a configuration.
Eventually, all vhost devices that want to be fully compatible with such
memory devices should support a decent number of memslots (>= 509).
Message-ID: <20230926185738.277351-13-david@redhat.com>
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
2023-09-26 20:57:32 +02:00
|
|
|
unsigned int memslot_auto_decision_active;
|
2018-04-23 18:51:23 +02:00
|
|
|
} DeviceMemoryState;
|
2018-04-23 18:51:17 +02:00
|
|
|
|
2019-05-18 22:54:19 +02:00
|
|
|
/**
|
|
|
|
* CpuTopology:
|
|
|
|
* @cpus: the number of present logical processors on the machine
|
2020-03-11 23:53:06 +01:00
|
|
|
* @sockets: the number of sockets on the machine
|
2021-09-29 04:58:11 +02:00
|
|
|
* @dies: the number of dies in one socket
|
hw/core/machine: Introduce CPU cluster topology support
The new Cluster-Aware Scheduling support has landed in Linux 5.16,
which has been proved to benefit the scheduling performance (e.g.
load balance and wake_affine strategy) on both x86_64 and AArch64.
So now in Linux 5.16 we have four-level arch-neutral CPU topology
definition like below and a new scheduler level for clusters.
struct cpu_topology {
int thread_id;
int core_id;
int cluster_id;
int package_id;
int llc_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
cpumask_t cluster_sibling;
cpumask_t llc_sibling;
}
A cluster generally means a group of CPU cores which share L2 cache
or other mid-level resources, and it is the shared resources that
is used to improve scheduler's behavior. From the point of view of
the size range, it's between CPU die and CPU core. For example, on
some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node,
and 4 CPU cores in each cluster. The 4 CPU cores share a separate
L2 cache and a L3 cache tag, which brings cache affinity advantage.
In virtualization, on the Hosts which have pClusters (physical
clusters), if we can design a vCPU topology with cluster level for
guest kernel and have a dedicated vCPU pinning. A Cluster-Aware
Guest kernel can also make use of the cache affinity of CPU clusters
to gain similar scheduling performance.
This patch adds infrastructure for CPU cluster level topology
configuration and parsing, so that the user can specify cluster
parameter if their machines support it.
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
[PMD: Added '(since 7.0)' to @clusters in qapi/machine.json]
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 10:22:09 +01:00
|
|
|
* @clusters: the number of clusters in one die
|
|
|
|
* @cores: the number of cores in one cluster
|
2021-09-29 04:58:11 +02:00
|
|
|
* @threads: the number of threads in one core
|
2019-05-18 22:54:19 +02:00
|
|
|
* @max_cpus: the maximum number of logical processors on the machine
|
|
|
|
*/
|
|
|
|
typedef struct CpuTopology {
|
|
|
|
unsigned int cpus;
|
2021-09-29 04:58:11 +02:00
|
|
|
unsigned int sockets;
|
2021-06-17 17:53:03 +02:00
|
|
|
unsigned int dies;
|
hw/core/machine: Introduce CPU cluster topology support
The new Cluster-Aware Scheduling support has landed in Linux 5.16,
which has been proved to benefit the scheduling performance (e.g.
load balance and wake_affine strategy) on both x86_64 and AArch64.
So now in Linux 5.16 we have four-level arch-neutral CPU topology
definition like below and a new scheduler level for clusters.
struct cpu_topology {
int thread_id;
int core_id;
int cluster_id;
int package_id;
int llc_id;
cpumask_t thread_sibling;
cpumask_t core_sibling;
cpumask_t cluster_sibling;
cpumask_t llc_sibling;
}
A cluster generally means a group of CPU cores which share L2 cache
or other mid-level resources, and it is the shared resources that
is used to improve scheduler's behavior. From the point of view of
the size range, it's between CPU die and CPU core. For example, on
some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node,
and 4 CPU cores in each cluster. The 4 CPU cores share a separate
L2 cache and a L3 cache tag, which brings cache affinity advantage.
In virtualization, on the Hosts which have pClusters (physical
clusters), if we can design a vCPU topology with cluster level for
guest kernel and have a dedicated vCPU pinning. A Cluster-Aware
Guest kernel can also make use of the cache affinity of CPU clusters
to gain similar scheduling performance.
This patch adds infrastructure for CPU cluster level topology
configuration and parsing, so that the user can specify cluster
parameter if their machines support it.
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
[PMD: Added '(since 7.0)' to @clusters in qapi/machine.json]
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2021-12-28 10:22:09 +01:00
|
|
|
unsigned int clusters;
|
2019-05-18 22:54:19 +02:00
|
|
|
unsigned int cores;
|
|
|
|
unsigned int threads;
|
|
|
|
unsigned int max_cpus;
|
|
|
|
} CpuTopology;
|
|
|
|
|
2014-03-05 18:30:45 +01:00
|
|
|
/**
|
|
|
|
* MachineState:
|
|
|
|
*/
|
|
|
|
struct MachineState {
|
|
|
|
/*< private >*/
|
|
|
|
Object parent_obj;
|
2014-07-01 16:14:41 +02:00
|
|
|
|
2014-03-05 18:30:45 +01:00
|
|
|
/*< public >*/
|
|
|
|
|
2021-03-03 18:36:36 +01:00
|
|
|
void *fdt;
|
2014-03-05 18:30:45 +01:00
|
|
|
char *dtb;
|
|
|
|
char *dumpdtb;
|
|
|
|
int phandle_start;
|
|
|
|
char *dt_compatible;
|
|
|
|
bool dump_guest_core;
|
|
|
|
bool mem_merge;
|
|
|
|
bool usb;
|
2015-03-23 18:05:28 +01:00
|
|
|
bool usb_disabled;
|
2014-03-05 18:30:45 +01:00
|
|
|
char *firmware;
|
2014-08-16 07:55:40 +02:00
|
|
|
bool iommu;
|
2015-02-23 13:56:42 +01:00
|
|
|
bool suppress_vmdesc;
|
2016-04-19 21:55:25 +02:00
|
|
|
bool enable_graphics;
|
2020-12-04 02:51:51 +01:00
|
|
|
ConfidentialGuestSupport *cgs;
|
2022-04-14 18:52:59 +02:00
|
|
|
HostMemoryBackend *memdev;
|
2020-02-19 17:08:38 +01:00
|
|
|
/*
|
|
|
|
* convenience alias to ram_memdev_id backend memory region
|
|
|
|
* or to numa container memory region
|
|
|
|
*/
|
|
|
|
MemoryRegion *ram;
|
2018-04-23 18:51:23 +02:00
|
|
|
DeviceMemoryState *device_memory;
|
2014-03-05 18:30:45 +01:00
|
|
|
|
2022-04-25 10:21:50 +02:00
|
|
|
/*
|
|
|
|
* Included in MachineState for simplicity, but not supported
|
|
|
|
* unless machine_add_audiodev_property is called. Boards
|
|
|
|
* that have embedded audio devices can call it from the
|
|
|
|
* machine init function and forward the property to the device.
|
|
|
|
*/
|
|
|
|
char *audiodev;
|
|
|
|
|
2014-05-07 16:42:57 +02:00
|
|
|
ram_addr_t ram_size;
|
2014-06-02 15:25:02 +02:00
|
|
|
ram_addr_t maxram_size;
|
|
|
|
uint64_t ram_slots;
|
2022-04-14 18:52:56 +02:00
|
|
|
BootConfiguration boot_config;
|
2014-05-26 14:40:58 +02:00
|
|
|
char *kernel_filename;
|
|
|
|
char *kernel_cmdline;
|
|
|
|
char *initrd_filename;
|
2017-09-13 18:04:55 +02:00
|
|
|
const char *cpu_type;
|
2014-09-26 22:45:31 +02:00
|
|
|
AccelState *accelerator;
|
2017-02-09 12:08:32 +01:00
|
|
|
CPUArchIdList *possible_cpus;
|
2019-05-18 22:54:19 +02:00
|
|
|
CpuTopology smp;
|
2019-03-08 19:20:53 +01:00
|
|
|
struct NVDIMMState *nvdimms_state;
|
2019-08-09 08:57:22 +02:00
|
|
|
struct NumaState *numa_state;
|
2014-03-05 18:30:45 +01:00
|
|
|
};
|
|
|
|
|
2015-09-04 20:37:06 +02:00
|
|
|
#define DEFINE_MACHINE(namestr, machine_initfn) \
|
|
|
|
static void machine_initfn##_class_init(ObjectClass *oc, void *data) \
|
|
|
|
{ \
|
|
|
|
MachineClass *mc = MACHINE_CLASS(oc); \
|
|
|
|
machine_initfn(mc); \
|
|
|
|
} \
|
|
|
|
static const TypeInfo machine_initfn##_typeinfo = { \
|
|
|
|
.name = MACHINE_TYPE_NAME(namestr), \
|
|
|
|
.parent = TYPE_MACHINE, \
|
|
|
|
.class_init = machine_initfn##_class_init, \
|
|
|
|
}; \
|
|
|
|
static void machine_initfn##_register_types(void) \
|
|
|
|
{ \
|
|
|
|
type_register_static(&machine_initfn##_typeinfo); \
|
|
|
|
} \
|
2016-02-16 21:59:04 +01:00
|
|
|
type_init(machine_initfn##_register_types)
|
2015-09-04 20:37:06 +02:00
|
|
|
|
2023-07-18 16:22:35 +02:00
|
|
|
extern GlobalProperty hw_compat_8_1[];
|
|
|
|
extern const size_t hw_compat_8_1_len;
|
|
|
|
|
2023-03-14 18:30:09 +01:00
|
|
|
extern GlobalProperty hw_compat_8_0[];
|
|
|
|
extern const size_t hw_compat_8_0_len;
|
|
|
|
|
2022-12-12 16:21:44 +01:00
|
|
|
extern GlobalProperty hw_compat_7_2[];
|
|
|
|
extern const size_t hw_compat_7_2_len;
|
|
|
|
|
2022-07-27 14:17:55 +02:00
|
|
|
extern GlobalProperty hw_compat_7_1[];
|
|
|
|
extern const size_t hw_compat_7_1_len;
|
|
|
|
|
2022-03-16 15:55:21 +01:00
|
|
|
extern GlobalProperty hw_compat_7_0[];
|
|
|
|
extern const size_t hw_compat_7_0_len;
|
|
|
|
|
2021-12-17 15:39:48 +01:00
|
|
|
extern GlobalProperty hw_compat_6_2[];
|
|
|
|
extern const size_t hw_compat_6_2_len;
|
|
|
|
|
2021-08-31 03:54:26 +02:00
|
|
|
extern GlobalProperty hw_compat_6_1[];
|
|
|
|
extern const size_t hw_compat_6_1_len;
|
|
|
|
|
2021-03-31 13:19:00 +02:00
|
|
|
extern GlobalProperty hw_compat_6_0[];
|
|
|
|
extern const size_t hw_compat_6_0_len;
|
|
|
|
|
2020-11-09 18:39:28 +01:00
|
|
|
extern GlobalProperty hw_compat_5_2[];
|
|
|
|
extern const size_t hw_compat_5_2_len;
|
|
|
|
|
2020-08-19 16:40:16 +02:00
|
|
|
extern GlobalProperty hw_compat_5_1[];
|
|
|
|
extern const size_t hw_compat_5_1_len;
|
|
|
|
|
2020-04-29 16:46:05 +02:00
|
|
|
extern GlobalProperty hw_compat_5_0[];
|
|
|
|
extern const size_t hw_compat_5_0_len;
|
|
|
|
|
2019-11-05 19:22:17 +01:00
|
|
|
extern GlobalProperty hw_compat_4_2[];
|
|
|
|
extern const size_t hw_compat_4_2_len;
|
|
|
|
|
2019-07-24 12:35:24 +02:00
|
|
|
extern GlobalProperty hw_compat_4_1[];
|
|
|
|
extern const size_t hw_compat_4_1_len;
|
|
|
|
|
2019-04-11 12:20:25 +02:00
|
|
|
extern GlobalProperty hw_compat_4_0[];
|
|
|
|
extern const size_t hw_compat_4_0_len;
|
|
|
|
|
2018-12-12 16:36:30 +01:00
|
|
|
extern GlobalProperty hw_compat_3_1[];
|
|
|
|
extern const size_t hw_compat_3_1_len;
|
|
|
|
|
2018-12-12 16:36:30 +01:00
|
|
|
extern GlobalProperty hw_compat_3_0[];
|
|
|
|
extern const size_t hw_compat_3_0_len;
|
|
|
|
|
2018-12-12 16:36:30 +01:00
|
|
|
extern GlobalProperty hw_compat_2_12[];
|
|
|
|
extern const size_t hw_compat_2_12_len;
|
|
|
|
|
2018-12-12 16:36:30 +01:00
|
|
|
extern GlobalProperty hw_compat_2_11[];
|
|
|
|
extern const size_t hw_compat_2_11_len;
|
|
|
|
|
2018-12-12 16:36:30 +01:00
|
|
|
extern GlobalProperty hw_compat_2_10[];
|
|
|
|
extern const size_t hw_compat_2_10_len;
|
|
|
|
|
2018-12-12 16:36:30 +01:00
|
|
|
extern GlobalProperty hw_compat_2_9[];
|
|
|
|
extern const size_t hw_compat_2_9_len;
|
|
|
|
|
2018-12-12 16:36:30 +01:00
|
|
|
extern GlobalProperty hw_compat_2_8[];
|
|
|
|
extern const size_t hw_compat_2_8_len;
|
|
|
|
|
2018-12-12 16:36:30 +01:00
|
|
|
extern GlobalProperty hw_compat_2_7[];
|
|
|
|
extern const size_t hw_compat_2_7_len;
|
|
|
|
|
2018-12-12 16:36:30 +01:00
|
|
|
extern GlobalProperty hw_compat_2_6[];
|
|
|
|
extern const size_t hw_compat_2_6_len;
|
|
|
|
|
2018-12-12 16:36:30 +01:00
|
|
|
extern GlobalProperty hw_compat_2_5[];
|
|
|
|
extern const size_t hw_compat_2_5_len;
|
|
|
|
|
2018-12-12 16:36:30 +01:00
|
|
|
extern GlobalProperty hw_compat_2_4[];
|
|
|
|
extern const size_t hw_compat_2_4_len;
|
|
|
|
|
2018-12-12 16:36:30 +01:00
|
|
|
extern GlobalProperty hw_compat_2_3[];
|
|
|
|
extern const size_t hw_compat_2_3_len;
|
|
|
|
|
2018-12-12 16:36:30 +01:00
|
|
|
extern GlobalProperty hw_compat_2_2[];
|
|
|
|
extern const size_t hw_compat_2_2_len;
|
|
|
|
|
2018-12-12 16:36:30 +01:00
|
|
|
extern GlobalProperty hw_compat_2_1[];
|
|
|
|
extern const size_t hw_compat_2_1_len;
|
|
|
|
|
2007-11-17 18:14:51 +01:00
|
|
|
#endif
|