hw/arm/virt: Implement kvm-steal-time

We add the kvm-steal-time CPU property and implement it for machvirt.
A tiny bit of refactoring was also done to allow pmu and pvtime to
use the same vcpu device helper functions.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-id: 20201001061718.101915-7-drjones@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Andrew Jones 2020-10-01 08:17:18 +02:00 committed by Peter Maydell
parent 05889d15d1
commit 68970d1e0d
10 changed files with 209 additions and 13 deletions

View File

@ -200,6 +200,17 @@ the list of KVM VCPU features and their descriptions.
adjustment, also restoring the legacy (pre-5.0)
behavior.
kvm-steal-time Since v5.2, kvm-steal-time is enabled by
default when KVM is enabled, the feature is
supported, and the guest is 64-bit.
When kvm-steal-time is enabled a 64-bit guest
can account for time its CPUs were not running
due to the host not scheduling the corresponding
VCPU threads. The accounting statistics may
influence the guest scheduler behavior and/or be
exposed to the guest userspace.
SVE CPU Properties
==================

View File

@ -151,6 +151,7 @@ static const MemMapEntry base_memmap[] = {
[VIRT_PCDIMM_ACPI] = { 0x09070000, MEMORY_HOTPLUG_IO_LEN },
[VIRT_ACPI_GED] = { 0x09080000, ACPI_GED_EVT_SEL_LEN },
[VIRT_NVDIMM_ACPI] = { 0x09090000, NVDIMM_ACPI_IO_LEN},
[VIRT_PVTIME] = { 0x090a0000, 0x00010000 },
[VIRT_MMIO] = { 0x0a000000, 0x00000200 },
/* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
[VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 },
@ -1666,15 +1667,40 @@ static void finalize_gic_version(VirtMachineState *vms)
* virt_cpu_post_init() must be called after the CPUs have
* been realized and the GIC has been created.
*/
static void virt_cpu_post_init(VirtMachineState *vms)
static void virt_cpu_post_init(VirtMachineState *vms, int max_cpus,
MemoryRegion *sysmem)
{
bool aarch64, pmu;
bool aarch64, pmu, steal_time;
CPUState *cpu;
aarch64 = object_property_get_bool(OBJECT(first_cpu), "aarch64", NULL);
pmu = object_property_get_bool(OBJECT(first_cpu), "pmu", NULL);
steal_time = object_property_get_bool(OBJECT(first_cpu),
"kvm-steal-time", NULL);
if (kvm_enabled()) {
hwaddr pvtime_reg_base = vms->memmap[VIRT_PVTIME].base;
hwaddr pvtime_reg_size = vms->memmap[VIRT_PVTIME].size;
if (steal_time) {
MemoryRegion *pvtime = g_new(MemoryRegion, 1);
hwaddr pvtime_size = max_cpus * PVTIME_SIZE_PER_CPU;
/* The memory region size must be a multiple of host page size. */
pvtime_size = REAL_HOST_PAGE_ALIGN(pvtime_size);
if (pvtime_size > pvtime_reg_size) {
error_report("pvtime requires a %" HWADDR_PRId
" byte memory region for %d CPUs,"
" but only %" HWADDR_PRId " has been reserved",
pvtime_size, max_cpus, pvtime_reg_size);
exit(1);
}
memory_region_init_ram(pvtime, NULL, "pvtime", pvtime_size, NULL);
memory_region_add_subregion(sysmem, pvtime_reg_base, pvtime);
}
CPU_FOREACH(cpu) {
if (pmu) {
assert(arm_feature(&ARM_CPU(cpu)->env, ARM_FEATURE_PMU));
@ -1683,6 +1709,10 @@ static void virt_cpu_post_init(VirtMachineState *vms)
}
kvm_arm_pmu_init(cpu);
}
if (steal_time) {
kvm_arm_pvtime_init(cpu, pvtime_reg_base +
cpu->cpu_index * PVTIME_SIZE_PER_CPU);
}
}
} else {
if (aarch64 && vms->highmem) {
@ -1853,6 +1883,11 @@ static void machvirt_init(MachineState *machine)
object_property_set_bool(cpuobj, "kvm-no-adjvtime", true, NULL);
}
if (vmc->no_kvm_steal_time &&
object_property_find(cpuobj, "kvm-steal-time")) {
object_property_set_bool(cpuobj, "kvm-steal-time", false, NULL);
}
if (vmc->no_pmu && object_property_find(cpuobj, "pmu")) {
object_property_set_bool(cpuobj, "pmu", false, NULL);
}
@ -1924,7 +1959,7 @@ static void machvirt_init(MachineState *machine)
create_gic(vms);
virt_cpu_post_init(vms);
virt_cpu_post_init(vms, possible_cpus->len, sysmem);
fdt_add_pmu_nodes(vms);
@ -2566,8 +2601,11 @@ DEFINE_VIRT_MACHINE_AS_LATEST(5, 2)
static void virt_machine_5_1_options(MachineClass *mc)
{
VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
virt_machine_5_2_options(mc);
compat_props_add(mc->compat_props, hw_compat_5_1, hw_compat_5_1_len);
vmc->no_kvm_steal_time = true;
}
DEFINE_VIRT_MACHINE(5, 1)

View File

@ -54,6 +54,9 @@
#define PPI(irq) ((irq) + 16)
/* See Linux kernel arch/arm64/include/asm/pvclock-abi.h */
#define PVTIME_SIZE_PER_CPU 64
enum {
VIRT_FLASH,
VIRT_MEM,
@ -81,6 +84,7 @@ enum {
VIRT_PCDIMM_ACPI,
VIRT_ACPI_GED,
VIRT_NVDIMM_ACPI,
VIRT_PVTIME,
VIRT_LOWMEMMAP_LAST,
};
@ -121,6 +125,7 @@ struct VirtMachineClass {
bool no_highmem_ecam;
bool no_ged; /* Machines < 4.2 has no support for ACPI GED device */
bool kvm_no_adjvtime;
bool no_kvm_steal_time;
bool acpi_expose_flash;
};

View File

@ -1310,6 +1310,14 @@ void arm_cpu_finalize_features(ARMCPU *cpu, Error **errp)
return;
}
}
if (kvm_enabled()) {
kvm_arm_steal_time_finalize(cpu, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
return;
}
}
}
static void arm_cpu_realizefn(DeviceState *dev, Error **errp)

View File

@ -24,6 +24,7 @@
#include "hw/registerfields.h"
#include "cpu-qom.h"
#include "exec/cpu-defs.h"
#include "qapi/qapi-types-common.h"
/* ARM processors have a weak memory model */
#define TCG_GUEST_DEFAULT_MO (0)
@ -863,6 +864,9 @@ struct ARMCPU {
bool kvm_vtime_dirty;
uint64_t kvm_vtime;
/* KVM steal time */
OnOffAuto kvm_steal_time;
/* Uniprocessor system with MP extensions */
bool mp_is_up;

View File

@ -192,6 +192,16 @@ static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp)
ARM_CPU(obj)->kvm_adjvtime = !value;
}
static bool kvm_steal_time_get(Object *obj, Error **errp)
{
return ARM_CPU(obj)->kvm_steal_time != ON_OFF_AUTO_OFF;
}
static void kvm_steal_time_set(Object *obj, bool value, Error **errp)
{
ARM_CPU(obj)->kvm_steal_time = value ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
}
/* KVM VCPU properties should be prefixed with "kvm-". */
void kvm_arm_add_vcpu_properties(Object *obj)
{
@ -207,6 +217,12 @@ void kvm_arm_add_vcpu_properties(Object *obj)
"the virtual counter. VM stopped time "
"will be counted.");
}
cpu->kvm_steal_time = ON_OFF_AUTO_AUTO;
object_property_add_bool(obj, "kvm-steal-time", kvm_steal_time_get,
kvm_steal_time_set);
object_property_set_description(obj, "kvm-steal-time",
"Set off to disable KVM steal time.");
}
bool kvm_arm_pmu_supported(void)

View File

@ -17,6 +17,7 @@
#include <linux/kvm.h>
#include "qemu-common.h"
#include "qapi/error.h"
#include "cpu.h"
#include "qemu/timer.h"
#include "qemu/error-report.h"
@ -397,19 +398,20 @@ static CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr)
return NULL;
}
static bool kvm_arm_pmu_set_attr(CPUState *cs, struct kvm_device_attr *attr)
static bool kvm_arm_set_device_attr(CPUState *cs, struct kvm_device_attr *attr,
const char *name)
{
int err;
err = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr);
if (err != 0) {
error_report("PMU: KVM_HAS_DEVICE_ATTR: %s", strerror(-err));
error_report("%s: KVM_HAS_DEVICE_ATTR: %s", name, strerror(-err));
return false;
}
err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, attr);
if (err != 0) {
error_report("PMU: KVM_SET_DEVICE_ATTR: %s", strerror(-err));
error_report("%s: KVM_SET_DEVICE_ATTR: %s", name, strerror(-err));
return false;
}
@ -426,7 +428,7 @@ void kvm_arm_pmu_init(CPUState *cs)
if (!ARM_CPU(cs)->has_pmu) {
return;
}
if (!kvm_arm_pmu_set_attr(cs, &attr)) {
if (!kvm_arm_set_device_attr(cs, &attr, "PMU")) {
error_report("failed to init PMU");
abort();
}
@ -443,12 +445,29 @@ void kvm_arm_pmu_set_irq(CPUState *cs, int irq)
if (!ARM_CPU(cs)->has_pmu) {
return;
}
if (!kvm_arm_pmu_set_attr(cs, &attr)) {
if (!kvm_arm_set_device_attr(cs, &attr, "PMU")) {
error_report("failed to set irq for PMU");
abort();
}
}
void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa)
{
struct kvm_device_attr attr = {
.group = KVM_ARM_VCPU_PVTIME_CTRL,
.attr = KVM_ARM_VCPU_PVTIME_IPA,
.addr = (uint64_t)&ipa,
};
if (ARM_CPU(cs)->kvm_steal_time == ON_OFF_AUTO_OFF) {
return;
}
if (!kvm_arm_set_device_attr(cs, &attr, "PVTIME IPA")) {
error_report("failed to init PVTIME IPA");
abort();
}
}
static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id)
{
uint64_t ret;
@ -655,6 +674,36 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
return true;
}
void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp)
{
bool has_steal_time = kvm_arm_steal_time_supported();
if (cpu->kvm_steal_time == ON_OFF_AUTO_AUTO) {
if (!has_steal_time || !arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
cpu->kvm_steal_time = ON_OFF_AUTO_OFF;
} else {
cpu->kvm_steal_time = ON_OFF_AUTO_ON;
}
} else if (cpu->kvm_steal_time == ON_OFF_AUTO_ON) {
if (!has_steal_time) {
error_setg(errp, "'kvm-steal-time' cannot be enabled "
"on this host");
return;
} else if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
/*
* DEN0057A chapter 2 says "This specification only covers
* systems in which the Execution state of the hypervisor
* as well as EL1 of virtual machines is AArch64.". And,
* to ensure that, the smc/hvc calls are only specified as
* smc64/hvc64.
*/
error_setg(errp, "'kvm-steal-time' cannot be enabled "
"for AArch32 guests");
return;
}
}
}
bool kvm_arm_aarch32_supported(void)
{
return kvm_check_extension(kvm_state, KVM_CAP_ARM_EL1_32BIT);
@ -665,6 +714,11 @@ bool kvm_arm_sve_supported(void)
return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE);
}
bool kvm_arm_steal_time_supported(void)
{
return kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME);
}
QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map)

View File

@ -267,6 +267,24 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu);
*/
void kvm_arm_add_vcpu_properties(Object *obj);
/**
* kvm_arm_steal_time_finalize:
* @cpu: ARMCPU for which to finalize kvm-steal-time
* @errp: Pointer to Error* for error propagation
*
* Validate the kvm-steal-time property selection and set its default
* based on KVM support and guest configuration.
*/
void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp);
/**
* kvm_arm_steal_time_supported:
*
* Returns: true if KVM can enable steal time reporting
* and false otherwise.
*/
bool kvm_arm_steal_time_supported(void);
/**
* kvm_arm_aarch32_supported:
*
@ -340,6 +358,16 @@ int kvm_arm_vgic_probe(void);
void kvm_arm_pmu_set_irq(CPUState *cs, int irq);
void kvm_arm_pmu_init(CPUState *cs);
/**
* kvm_arm_pvtime_init:
* @cs: CPUState
* @ipa: Per-vcpu guest physical base address of the pvtime structures
*
* Initializes PVTIME for the VCPU, setting the PVTIME IPA to @ipa.
*/
void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa);
int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level);
#else
@ -363,6 +391,11 @@ static inline bool kvm_arm_sve_supported(void)
return false;
}
static inline bool kvm_arm_steal_time_supported(void)
{
return false;
}
/*
* These functions should never actually be called without KVM support.
*/
@ -396,6 +429,16 @@ static inline void kvm_arm_pmu_init(CPUState *cs)
g_assert_not_reached();
}
static inline void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa)
{
g_assert_not_reached();
}
static inline void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp)
{
g_assert_not_reached();
}
static inline void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map)
{
g_assert_not_reached();

View File

@ -103,7 +103,7 @@ static const char *cpu_model_advertised_features[] = {
"sve128", "sve256", "sve384", "sve512",
"sve640", "sve768", "sve896", "sve1024", "sve1152", "sve1280",
"sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048",
"kvm-no-adjvtime",
"kvm-no-adjvtime", "kvm-steal-time",
NULL
};

View File

@ -452,6 +452,7 @@ static void test_query_cpu_model_expansion(const void *data)
assert_set_feature(qts, "max", "pmu", true);
assert_has_not_feature(qts, "max", "kvm-no-adjvtime");
assert_has_not_feature(qts, "max", "kvm-steal-time");
if (g_str_equal(qtest_get_arch(), "aarch64")) {
assert_has_feature_enabled(qts, "max", "aarch64");
@ -493,6 +494,7 @@ static void test_query_cpu_model_expansion_kvm(const void *data)
assert_set_feature(qts, "host", "kvm-no-adjvtime", false);
if (g_str_equal(qtest_get_arch(), "aarch64")) {
bool kvm_supports_steal_time;
bool kvm_supports_sve;
char max_name[8], name[8];
uint32_t max_vq, vq;
@ -500,6 +502,10 @@ static void test_query_cpu_model_expansion_kvm(const void *data)
QDict *resp;
char *error;
assert_error(qts, "cortex-a15",
"We cannot guarantee the CPU type 'cortex-a15' works "
"with KVM on this host", NULL);
assert_has_feature_enabled(qts, "host", "aarch64");
/* Enabling and disabling pmu should always work. */
@ -507,16 +513,26 @@ static void test_query_cpu_model_expansion_kvm(const void *data)
assert_set_feature(qts, "host", "pmu", false);
assert_set_feature(qts, "host", "pmu", true);
assert_error(qts, "cortex-a15",
"We cannot guarantee the CPU type 'cortex-a15' works "
"with KVM on this host", NULL);
/*
* Some features would be enabled by default, but they're disabled
* because this instance of KVM doesn't support them. Test that the
* features are present, and, when enabled, issue further tests.
*/
assert_has_feature(qts, "host", "kvm-steal-time");
assert_has_feature(qts, "host", "sve");
resp = do_query_no_props(qts, "host");
kvm_supports_steal_time = resp_get_feature(resp, "kvm-steal-time");
kvm_supports_sve = resp_get_feature(resp, "sve");
vls = resp_get_sve_vls(resp);
qobject_unref(resp);
if (kvm_supports_steal_time) {
/* If we have steal-time then we should be able to toggle it. */
assert_set_feature(qts, "host", "kvm-steal-time", false);
assert_set_feature(qts, "host", "kvm-steal-time", true);
}
if (kvm_supports_sve) {
g_assert(vls != 0);
max_vq = 64 - __builtin_clzll(vls);
@ -577,6 +593,7 @@ static void test_query_cpu_model_expansion_kvm(const void *data)
assert_has_not_feature(qts, "host", "aarch64");
assert_has_not_feature(qts, "host", "pmu");
assert_has_not_feature(qts, "host", "sve");
assert_has_not_feature(qts, "host", "kvm-steal-time");
}
qtest_quit(qts);