From 24f7973b67dd6036f0c193a92d265722911a620a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 24 Aug 2018 17:03:41 +0200 Subject: [PATCH 01/47] es1370: more fixes for ADC_FRAMEADR and ADC_FRAMECNT They are not consecutive with DAC1_FRAME* and DAC2_FRAME*; Coverity still complains about es1370_read, while es1370_write was fixed in commit cf9270e5220671f49cc238deaf6136669cc07ae1. Fixes: 154c1d1f960c5147a3f8ef00907504112f271cd8 Signed-off-by: Paolo Bonzini --- hw/audio/es1370.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hw/audio/es1370.c b/hw/audio/es1370.c index 4f980a598b..97789a0771 100644 --- a/hw/audio/es1370.c +++ b/hw/audio/es1370.c @@ -585,10 +585,13 @@ static uint64_t es1370_read(void *opaque, hwaddr addr, unsigned size) #endif break; + case ES1370_REG_ADC_FRAMECNT: + d += 2; + goto framecnt; case ES1370_REG_DAC1_FRAMECNT: case ES1370_REG_DAC2_FRAMECNT: - case ES1370_REG_ADC_FRAMECNT: d += (addr - ES1370_REG_DAC1_FRAMECNT) >> 3; + framecnt: val = d->frame_cnt; #ifdef DEBUG_ES1370 { @@ -602,10 +605,13 @@ static uint64_t es1370_read(void *opaque, hwaddr addr, unsigned size) #endif break; + case ES1370_REG_ADC_FRAMEADR: + d += 2; + goto frameadr; case ES1370_REG_DAC1_FRAMEADR: case ES1370_REG_DAC2_FRAMEADR: - case ES1370_REG_ADC_FRAMEADR: d += (addr - ES1370_REG_DAC1_FRAMEADR) >> 3; + frameadr: val = d->frame_addr; break; From 05ff8dc32fa124e7bbf77a257f863f3685c7be9d Mon Sep 17 00:00:00 2001 From: Artem Pisarenko Date: Wed, 17 Oct 2018 14:24:18 +0600 Subject: [PATCH 02/47] Revert some patches from recent [PATCH v6] "Fixing record/replay and adding reverse debugging" That patch series introduced new virtual clock type for use in external subsystems. It breaks desired behavior in non-record/replay usage scenarios due to a small change to existing behavior. Processing of virtual timers belonging to new clock type is kicked off to the main loop, which makes these timers asynchronous with vCPU thread and, in icount mode, with whole guest execution. This breaks expected determinism in non-record/replay icount mode of emulation where these "external subsystems" are isolated from the host (i.e. they are external only to guest core, not to the entire emulation environment). Example for slirp ("user" backend for network device): User runs qemu in icount mode with rtc clock=vm without any external communication interfaces but with "-netdev user,restrict=on". It expects deterministic execution, because network services are emulated inside qemu and isolated from host. There are no reasons to get reply from DHCP server with different delay or something like that. The next patches revert reimplements the same changes in a better way. This reverts commit 87f4fe7653baf55b5c2f2753fe6003f473c07342. This reverts commit 775a412bf83f6bc0c5c02091ee06cf649b34c593. This reverts commit 9888091404a702d7ec79d51b088d994b9fc121bd. Signed-off-by: Artem Pisarenko Message-Id: <18b1e7c8f155fe26976f91be06bde98eef6f8751.1539764043.git.artem.k.pisarenko@gmail.com> Signed-off-by: Paolo Bonzini --- include/qemu/timer.h | 9 --------- slirp/ip6_icmp.c | 7 +++---- ui/input.c | 8 ++++---- util/qemu-timer.c | 2 -- 4 files changed, 7 insertions(+), 19 deletions(-) diff --git a/include/qemu/timer.h b/include/qemu/timer.h index a005ed2692..39ea907e65 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -42,14 +42,6 @@ * In icount mode, this clock counts nanoseconds while the virtual * machine is running. It is used to increase @QEMU_CLOCK_VIRTUAL * while the CPUs are sleeping and thus not executing instructions. - * - * @QEMU_CLOCK_VIRTUAL_EXT: virtual clock for external subsystems - * - * The virtual clock only runs during the emulation. It stops - * when the virtual machine is stopped. The timers for this clock - * do not recorded in rr mode, therefore this clock could be used - * for the subsystems that operate outside the guest core. - * */ typedef enum { @@ -57,7 +49,6 @@ typedef enum { QEMU_CLOCK_VIRTUAL = 1, QEMU_CLOCK_HOST = 2, QEMU_CLOCK_VIRTUAL_RT = 3, - QEMU_CLOCK_VIRTUAL_EXT = 4, QEMU_CLOCK_MAX } QEMUClockType; diff --git a/slirp/ip6_icmp.c b/slirp/ip6_icmp.c index 3f41187cfe..ee333d05a2 100644 --- a/slirp/ip6_icmp.c +++ b/slirp/ip6_icmp.c @@ -17,7 +17,7 @@ static void ra_timer_handler(void *opaque) { Slirp *slirp = opaque; timer_mod(slirp->ra_timer, - qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_EXT) + NDP_Interval); + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NDP_Interval); ndp_send_ra(slirp); } @@ -27,10 +27,9 @@ void icmp6_init(Slirp *slirp) return; } - slirp->ra_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_EXT, - ra_timer_handler, slirp); + slirp->ra_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, ra_timer_handler, slirp); timer_mod(slirp->ra_timer, - qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_EXT) + NDP_Interval); + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NDP_Interval); } void icmp6_cleanup(Slirp *slirp) diff --git a/ui/input.c b/ui/input.c index dd7f6d7f21..51b1019252 100644 --- a/ui/input.c +++ b/ui/input.c @@ -271,7 +271,7 @@ static void qemu_input_queue_process(void *opaque) item = QTAILQ_FIRST(queue); switch (item->type) { case QEMU_INPUT_QUEUE_DELAY: - timer_mod(item->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_EXT) + timer_mod(item->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + item->delay_ms); return; case QEMU_INPUT_QUEUE_EVENT: @@ -301,7 +301,7 @@ static void qemu_input_queue_delay(struct QemuInputEventQueueHead *queue, queue_count++; if (start_timer) { - timer_mod(item->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_EXT) + timer_mod(item->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + item->delay_ms); } } @@ -448,8 +448,8 @@ void qemu_input_event_send_key_delay(uint32_t delay_ms) } if (!kbd_timer) { - kbd_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_EXT, - qemu_input_queue_process, &kbd_queue); + kbd_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, qemu_input_queue_process, + &kbd_queue); } if (queue_count < queue_limit) { qemu_input_queue_delay(&kbd_queue, kbd_timer, diff --git a/util/qemu-timer.c b/util/qemu-timer.c index eb60d8f73a..86bfe84037 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -496,7 +496,6 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) switch (timer_list->clock->type) { case QEMU_CLOCK_REALTIME: - case QEMU_CLOCK_VIRTUAL_EXT: break; default: case QEMU_CLOCK_VIRTUAL: @@ -598,7 +597,6 @@ int64_t qemu_clock_get_ns(QEMUClockType type) return get_clock(); default: case QEMU_CLOCK_VIRTUAL: - case QEMU_CLOCK_VIRTUAL_EXT: if (use_icount) { return cpu_get_icount(); } else { From 89a603a0c80ae3d6a8711571550b2ae9a01ea909 Mon Sep 17 00:00:00 2001 From: Artem Pisarenko Date: Wed, 17 Oct 2018 14:24:19 +0600 Subject: [PATCH 03/47] qemu-timer: introduce timer attributes Attributes are simple flags, associated with individual timers for their whole lifetime. They intended to be used to mark individual timers for special handling when they fire. New/init functions family in timer interface updated and refactored (new 'attribute' argument added, timer_list replaced with timer_list_group+type combinations, comments improved to avoid info duplication). Also existing aio interface extended with attribute-enabled variants of functions, which create/initialize timers. Signed-off-by: Artem Pisarenko Message-Id: Signed-off-by: Paolo Bonzini --- include/block/aio.h | 59 ++++++++++++++++++--- include/qemu/timer.h | 109 +++++++++++++++++++------------------- tests/ptimer-test-stubs.c | 13 +++-- util/qemu-timer.c | 13 +++-- 4 files changed, 124 insertions(+), 70 deletions(-) diff --git a/include/block/aio.h b/include/block/aio.h index f08630c6e5..0ca25dfec6 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -387,6 +387,32 @@ struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp); /* Return the LinuxAioState bound to this AioContext */ struct LinuxAioState *aio_get_linux_aio(AioContext *ctx); +/** + * aio_timer_new_with_attrs: + * @ctx: the aio context + * @type: the clock type + * @scale: the scale + * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_ values + * to assign + * @cb: the callback to call on timer expiry + * @opaque: the opaque pointer to pass to the callback + * + * Allocate a new timer (with attributes) attached to the context @ctx. + * The function is responsible for memory allocation. + * + * The preferred interface is aio_timer_init or aio_timer_init_with_attrs. + * Use that unless you really need dynamic memory allocation. + * + * Returns: a pointer to the new timer + */ +static inline QEMUTimer *aio_timer_new_with_attrs(AioContext *ctx, + QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) +{ + return timer_new_full(&ctx->tlg, type, scale, attributes, cb, opaque); +} + /** * aio_timer_new: * @ctx: the aio context @@ -396,10 +422,7 @@ struct LinuxAioState *aio_get_linux_aio(AioContext *ctx); * @opaque: the opaque pointer to pass to the callback * * Allocate a new timer attached to the context @ctx. - * The function is responsible for memory allocation. - * - * The preferred interface is aio_timer_init. Use that - * unless you really need dynamic memory allocation. + * See aio_timer_new_with_attrs for details. * * Returns: a pointer to the new timer */ @@ -407,7 +430,29 @@ static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type, int scale, QEMUTimerCB *cb, void *opaque) { - return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque); + return timer_new_full(&ctx->tlg, type, scale, 0, cb, opaque); +} + +/** + * aio_timer_init_with_attrs: + * @ctx: the aio context + * @ts: the timer + * @type: the clock type + * @scale: the scale + * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_ values + * to assign + * @cb: the callback to call on timer expiry + * @opaque: the opaque pointer to pass to the callback + * + * Initialise a new timer (with attributes) attached to the context @ctx. + * The caller is responsible for memory allocation. + */ +static inline void aio_timer_init_with_attrs(AioContext *ctx, + QEMUTimer *ts, QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) +{ + timer_init_full(ts, &ctx->tlg, type, scale, attributes, cb, opaque); } /** @@ -420,14 +465,14 @@ static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type, * @opaque: the opaque pointer to pass to the callback * * Initialise a new timer attached to the context @ctx. - * The caller is responsible for memory allocation. + * See aio_timer_init_with_attrs for details. */ static inline void aio_timer_init(AioContext *ctx, QEMUTimer *ts, QEMUClockType type, int scale, QEMUTimerCB *cb, void *opaque) { - timer_init_tl(ts, ctx->tlg.tl[type], scale, cb, opaque); + timer_init_full(ts, &ctx->tlg, type, scale, 0, cb, opaque); } /** diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 39ea907e65..8ff1092f14 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -2,6 +2,7 @@ #define QEMU_TIMER_H #include "qemu-common.h" +#include "qemu/bitops.h" #include "qemu/notify.h" #include "qemu/host-utils.h" @@ -52,6 +53,16 @@ typedef enum { QEMU_CLOCK_MAX } QEMUClockType; +/** + * QEMU Timer attributes: + * + * An individual timer may be given one or multiple attributes when initialized. + * Each attribute corresponds to one bit. Attributes modify the processing + * of timers when they fire. + * + * No attributes defined currently. + */ + typedef struct QEMUTimerList QEMUTimerList; struct QEMUTimerListGroup { @@ -67,6 +78,7 @@ struct QEMUTimer { QEMUTimerCB *cb; void *opaque; QEMUTimer *next; + int attributes; int scale; }; @@ -418,22 +430,27 @@ int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg); */ /** - * timer_init_tl: + * timer_init_full: * @ts: the timer to be initialised - * @timer_list: the timer list to attach the timer to + * @timer_list_group: (optional) the timer list group to attach the timer to + * @type: the clock type to use * @scale: the scale value for the timer + * @attributes: 0, or one or more OR'ed QEMU_TIMER_ATTR_ values * @cb: the callback to be called when the timer expires * @opaque: the opaque pointer to be passed to the callback * - * Initialise a new timer and associate it with @timer_list. + * Initialise a timer with the given scale and attributes, + * and associate it with timer list for given clock @type in @timer_list_group + * (or default timer list group, if NULL). * The caller is responsible for allocating the memory. * * You need not call an explicit deinit call. Simply make * sure it is not on a list with timer_del. */ -void timer_init_tl(QEMUTimer *ts, - QEMUTimerList *timer_list, int scale, - QEMUTimerCB *cb, void *opaque); +void timer_init_full(QEMUTimer *ts, + QEMUTimerListGroup *timer_list_group, QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque); /** * timer_init: @@ -445,14 +462,12 @@ void timer_init_tl(QEMUTimer *ts, * * Initialize a timer with the given scale on the default timer list * associated with the clock. - * - * You need not call an explicit deinit call. Simply make - * sure it is not on a list with timer_del. + * See timer_init_full for details. */ static inline void timer_init(QEMUTimer *ts, QEMUClockType type, int scale, QEMUTimerCB *cb, void *opaque) { - timer_init_tl(ts, main_loop_tlg.tl[type], scale, cb, opaque); + timer_init_full(ts, NULL, type, scale, 0, cb, opaque); } /** @@ -464,9 +479,7 @@ static inline void timer_init(QEMUTimer *ts, QEMUClockType type, int scale, * * Initialize a timer with nanosecond scale on the default timer list * associated with the clock. - * - * You need not call an explicit deinit call. Simply make - * sure it is not on a list with timer_del. + * See timer_init_full for details. */ static inline void timer_init_ns(QEMUTimer *ts, QEMUClockType type, QEMUTimerCB *cb, void *opaque) @@ -483,9 +496,7 @@ static inline void timer_init_ns(QEMUTimer *ts, QEMUClockType type, * * Initialize a timer with microsecond scale on the default timer list * associated with the clock. - * - * You need not call an explicit deinit call. Simply make - * sure it is not on a list with timer_del. + * See timer_init_full for details. */ static inline void timer_init_us(QEMUTimer *ts, QEMUClockType type, QEMUTimerCB *cb, void *opaque) @@ -502,9 +513,7 @@ static inline void timer_init_us(QEMUTimer *ts, QEMUClockType type, * * Initialize a timer with millisecond scale on the default timer list * associated with the clock. - * - * You need not call an explicit deinit call. Simply make - * sure it is not on a list with timer_del. + * See timer_init_full for details. */ static inline void timer_init_ms(QEMUTimer *ts, QEMUClockType type, QEMUTimerCB *cb, void *opaque) @@ -513,27 +522,37 @@ static inline void timer_init_ms(QEMUTimer *ts, QEMUClockType type, } /** - * timer_new_tl: - * @timer_list: the timer list to attach the timer to + * timer_new_full: + * @timer_list_group: (optional) the timer list group to attach the timer to + * @type: the clock type to use * @scale: the scale value for the timer + * @attributes: 0, or one or more OR'ed QEMU_TIMER_ATTR_ values * @cb: the callback to be called when the timer expires * @opaque: the opaque pointer to be passed to the callback * - * Create a new timer and associate it with @timer_list. + * Create a new timer with the given scale and attributes, + * and associate it with timer list for given clock @type in @timer_list_group + * (or default timer list group, if NULL). * The memory is allocated by the function. * * This is not the preferred interface unless you know you - * are going to call timer_free. Use timer_init instead. + * are going to call timer_free. Use timer_init or timer_init_full instead. + * + * The default timer list has one special feature: in icount mode, + * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is + * not true of other timer lists, which are typically associated + * with an AioContext---each of them runs its timer callbacks in its own + * AioContext thread. * * Returns: a pointer to the timer */ -static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list, - int scale, - QEMUTimerCB *cb, - void *opaque) +static inline QEMUTimer *timer_new_full(QEMUTimerListGroup *timer_list_group, + QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) { QEMUTimer *ts = g_malloc0(sizeof(QEMUTimer)); - timer_init_tl(ts, timer_list, scale, cb, opaque); + timer_init_full(ts, timer_list_group, type, scale, attributes, cb, opaque); return ts; } @@ -544,21 +563,16 @@ static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list, * @cb: the callback to be called when the timer expires * @opaque: the opaque pointer to be passed to the callback * - * Create a new timer and associate it with the default - * timer list for the clock type @type. - * - * The default timer list has one special feature: in icount mode, - * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is - * not true of other timer lists, which are typically associated - * with an AioContext---each of them runs its timer callbacks in its own - * AioContext thread. + * Create a new timer with the given scale, + * and associate it with the default timer list for the clock type @type. + * See timer_new_full for details. * * Returns: a pointer to the timer */ static inline QEMUTimer *timer_new(QEMUClockType type, int scale, QEMUTimerCB *cb, void *opaque) { - return timer_new_tl(main_loop_tlg.tl[type], scale, cb, opaque); + return timer_new_full(NULL, type, scale, 0, cb, opaque); } /** @@ -569,12 +583,7 @@ static inline QEMUTimer *timer_new(QEMUClockType type, int scale, * * Create a new timer with nanosecond scale on the default timer list * associated with the clock. - * - * The default timer list has one special feature: in icount mode, - * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is - * not true of other timer lists, which are typically associated - * with an AioContext---each of them runs its timer callbacks in its own - * AioContext thread. + * See timer_new_full for details. * * Returns: a pointer to the newly created timer */ @@ -590,14 +599,9 @@ static inline QEMUTimer *timer_new_ns(QEMUClockType type, QEMUTimerCB *cb, * @cb: the callback to call when the timer expires * @opaque: the opaque pointer to pass to the callback * - * The default timer list has one special feature: in icount mode, - * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is - * not true of other timer lists, which are typically associated - * with an AioContext---each of them runs its timer callbacks in its own - * AioContext thread. - * * Create a new timer with microsecond scale on the default timer list * associated with the clock. + * See timer_new_full for details. * * Returns: a pointer to the newly created timer */ @@ -613,14 +617,9 @@ static inline QEMUTimer *timer_new_us(QEMUClockType type, QEMUTimerCB *cb, * @cb: the callback to call when the timer expires * @opaque: the opaque pointer to pass to the callback * - * The default timer list has one special feature: in icount mode, - * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is - * not true of other timer lists, which are typically associated - * with an AioContext---each of them runs its timer callbacks in its own - * AioContext thread. - * * Create a new timer with millisecond scale on the default timer list * associated with the clock. + * See timer_new_full for details. * * Returns: a pointer to the newly created timer */ diff --git a/tests/ptimer-test-stubs.c b/tests/ptimer-test-stubs.c index ca5cc3b13b..54b3fd26f6 100644 --- a/tests/ptimer-test-stubs.c +++ b/tests/ptimer-test-stubs.c @@ -34,14 +34,19 @@ int64_t ptimer_test_time_ns; int use_icount = 1; bool qtest_allowed; -void timer_init_tl(QEMUTimer *ts, - QEMUTimerList *timer_list, int scale, - QEMUTimerCB *cb, void *opaque) +void timer_init_full(QEMUTimer *ts, + QEMUTimerListGroup *timer_list_group, QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) { - ts->timer_list = timer_list; + if (!timer_list_group) { + timer_list_group = &main_loop_tlg; + } + ts->timer_list = timer_list_group->tl[type]; ts->cb = cb; ts->opaque = opaque; ts->scale = scale; + ts->attributes = attributes; ts->expire_time = -1; } diff --git a/util/qemu-timer.c b/util/qemu-timer.c index 86bfe84037..04527a343f 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -339,14 +339,19 @@ int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout) } -void timer_init_tl(QEMUTimer *ts, - QEMUTimerList *timer_list, int scale, - QEMUTimerCB *cb, void *opaque) +void timer_init_full(QEMUTimer *ts, + QEMUTimerListGroup *timer_list_group, QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) { - ts->timer_list = timer_list; + if (!timer_list_group) { + timer_list_group = &main_loop_tlg; + } + ts->timer_list = timer_list_group->tl[type]; ts->cb = cb; ts->opaque = opaque; ts->scale = scale; + ts->attributes = attributes; ts->expire_time = -1; } From e81f86790f561437b70549aff05433731b464e62 Mon Sep 17 00:00:00 2001 From: Artem Pisarenko Date: Wed, 17 Oct 2018 14:24:20 +0600 Subject: [PATCH 04/47] qemu-timer: avoid checkpoints for virtual clock timers in external subsystems Adds EXTERNAL attribute definition to qemu timers subsystem and assigns it to virtual clock timers, used in slirp (ICMP IPv6) and ui (key queue). Virtual clock processing in rr mode can use this attribute instead of a separate clock type. Fixes: 87f4fe7653baf55b5c2f2753fe6003f473c07342 Fixes: 775a412bf83f6bc0c5c02091ee06cf649b34c593 Fixes: 9888091404a702d7ec79d51b088d994b9fc121bd Signed-off-by: Artem Pisarenko Message-Id: Signed-off-by: Paolo Bonzini --- include/qemu/timer.h | 10 ++++++++- slirp/ip6_icmp.c | 4 +++- ui/input.c | 5 +++-- util/qemu-timer.c | 50 +++++++++++++++++++++++++++++++++++++------- 4 files changed, 58 insertions(+), 11 deletions(-) diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 8ff1092f14..9f37c92bd1 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -60,9 +60,17 @@ typedef enum { * Each attribute corresponds to one bit. Attributes modify the processing * of timers when they fire. * - * No attributes defined currently. + * The following attributes are available: + * + * QEMU_TIMER_ATTR_EXTERNAL: drives external subsystem + * + * Timers with this attribute do not recorded in rr mode, therefore it could be + * used for the subsystems that operate outside the guest core. Applicable only + * with virtual clock type. */ +#define QEMU_TIMER_ATTR_EXTERNAL BIT(0) + typedef struct QEMUTimerList QEMUTimerList; struct QEMUTimerListGroup { diff --git a/slirp/ip6_icmp.c b/slirp/ip6_icmp.c index ee333d05a2..cd1e0b9fe1 100644 --- a/slirp/ip6_icmp.c +++ b/slirp/ip6_icmp.c @@ -27,7 +27,9 @@ void icmp6_init(Slirp *slirp) return; } - slirp->ra_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, ra_timer_handler, slirp); + slirp->ra_timer = timer_new_full(NULL, QEMU_CLOCK_VIRTUAL, + SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL, + ra_timer_handler, slirp); timer_mod(slirp->ra_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NDP_Interval); } diff --git a/ui/input.c b/ui/input.c index 51b1019252..7c9a4109c4 100644 --- a/ui/input.c +++ b/ui/input.c @@ -448,8 +448,9 @@ void qemu_input_event_send_key_delay(uint32_t delay_ms) } if (!kbd_timer) { - kbd_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, qemu_input_queue_process, - &kbd_queue); + kbd_timer = timer_new_full(NULL, QEMU_CLOCK_VIRTUAL, + SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL, + qemu_input_queue_process, &kbd_queue); } if (queue_count < queue_limit) { qemu_input_queue_delay(&kbd_queue, kbd_timer, diff --git a/util/qemu-timer.c b/util/qemu-timer.c index 04527a343f..1cc1b2f2c3 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -489,6 +489,7 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) bool progress = false; QEMUTimerCB *cb; void *opaque; + bool need_replay_checkpoint = false; if (!atomic_read(&timer_list->active_timers)) { return false; @@ -504,8 +505,15 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) break; default: case QEMU_CLOCK_VIRTUAL: - if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) { - goto out; + if (replay_mode != REPLAY_MODE_NONE) { + /* Checkpoint for virtual clock is redundant in cases where + * it's being triggered with only non-EXTERNAL timers, because + * these timers don't change guest state directly. + * Since it has conditional dependence on specific timers, it is + * subject to race conditions and requires special handling. + * See below. + */ + need_replay_checkpoint = true; } break; case QEMU_CLOCK_HOST: @@ -520,14 +528,39 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) break; } + /* + * Extract expired timers from active timers list and and process them. + * + * In rr mode we need "filtered" checkpointing for virtual clock. The + * checkpoint must be recorded/replayed before processing any non-EXTERNAL timer, + * and that must only be done once since the clock value stays the same. Because + * non-EXTERNAL timers may appear in the timers list while it being processed, + * the checkpoint can be issued at a time until no timers are left and we are + * done". + */ current_time = qemu_clock_get_ns(timer_list->clock->type); - for(;;) { - qemu_mutex_lock(&timer_list->active_timers_lock); - ts = timer_list->active_timers; + qemu_mutex_lock(&timer_list->active_timers_lock); + while ((ts = timer_list->active_timers)) { if (!timer_expired_ns(ts, current_time)) { - qemu_mutex_unlock(&timer_list->active_timers_lock); + /* No expired timers left. The checkpoint can be skipped + * if no timers fired or they were all external. + */ break; } + if (need_replay_checkpoint + && !(ts->attributes & QEMU_TIMER_ATTR_EXTERNAL)) { + /* once we got here, checkpoint clock only once */ + need_replay_checkpoint = false; + qemu_mutex_unlock(&timer_list->active_timers_lock); + if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) { + goto out; + } + qemu_mutex_lock(&timer_list->active_timers_lock); + /* The lock was released; start over again in case the list was + * modified. + */ + continue; + } /* remove timer from the list before calling the callback */ timer_list->active_timers = ts->next; @@ -535,12 +568,15 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) ts->expire_time = -1; cb = ts->cb; opaque = ts->opaque; - qemu_mutex_unlock(&timer_list->active_timers_lock); /* run the callback (the timer list can be modified) */ + qemu_mutex_unlock(&timer_list->active_timers_lock); cb(opaque); + qemu_mutex_lock(&timer_list->active_timers_lock); + progress = true; } + qemu_mutex_unlock(&timer_list->active_timers_lock); out: qemu_event_set(&timer_list->timers_done_ev); From b31c003895b030bea1319037d6bec976d47d9020 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2018 14:35:23 +0200 Subject: [PATCH 05/47] target-i386: kvm: do not initialize padding fields The exception.pad field is going to be renamed to pending in an upcoming header file update. Remove the unnecessary initialization; it was introduced to please valgrind (commit 7e680753cfa2) but they were later rendered unnecessary by commit 076796f8fd27f4d, which added the "= {}" initializer to the declaration of "events". Therefore the patch does not change behavior in any way. Reviewed-by: Peter Maydell Signed-off-by: Paolo Bonzini --- target/i386/kvm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index dc4047b02f..302f420064 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -2686,7 +2686,6 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) events.exception.nr = env->exception_injected; events.exception.has_error_code = env->has_error_code; events.exception.error_code = env->error_code; - events.exception.pad = 0; events.interrupt.injected = (env->interrupt_injected >= 0); events.interrupt.nr = env->interrupt_injected; @@ -2695,7 +2694,6 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) events.nmi.injected = env->nmi_injected; events.nmi.pending = env->nmi_pending; events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK); - events.nmi.pad = 0; events.sipi_vector = env->sipi_vector; events.flags = 0; From 966f2ec3ac854275c2ab0747785a4a91ef832eaf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 18 Oct 2018 14:37:10 +0200 Subject: [PATCH 06/47] linux-headers: update to 4.20-rc1 This brings in eVMCS and coalesced PIO support, as well as other features we do not support yet. Signed-off-by: Paolo Bonzini --- linux-headers/asm-powerpc/kvm.h | 1 + linux-headers/asm-x86/kvm.h | 8 ++++++-- linux-headers/linux/kvm.h | 16 ++++++++++++++-- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index 1b32b56a03..8c876c166e 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) #define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) +#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index fd23d5778e..dabfcf7c39 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -288,6 +288,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 +#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -299,7 +300,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - __u8 pad; + __u8 pending; __u32 error_code; } exception; struct { @@ -322,7 +323,9 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u32 reserved[9]; + __u8 reserved[27]; + __u8 exception_has_payload; + __u64 exception_payload; }; /* for KVM_GET/SET_DEBUGREGS */ @@ -381,6 +384,7 @@ struct kvm_sync_regs { #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 +#define KVM_STATE_NESTED_EVMCS 0x00000004 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 83ba4eb571..f11a7eb49c 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -420,13 +420,19 @@ struct kvm_run { struct kvm_coalesced_mmio_zone { __u64 addr; __u32 size; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; }; struct kvm_coalesced_mmio { __u64 phys_addr; __u32 len; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; __u8 data[8]; }; @@ -719,6 +725,7 @@ struct kvm_ppc_one_seg_page_size { #define KVM_PPC_PAGE_SIZES_REAL 0x00000001 #define KVM_PPC_1T_SEGMENTS 0x00000002 +#define KVM_PPC_NO_HASH 0x00000004 struct kvm_ppc_smmu_info { __u64 flags; @@ -953,6 +960,11 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_NESTED_STATE 157 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158 #define KVM_CAP_MSR_PLATFORM_INFO 159 +#define KVM_CAP_PPC_NESTED_HV 160 +#define KVM_CAP_HYPERV_SEND_IPI 161 +#define KVM_CAP_COALESCED_PIO 162 +#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 +#define KVM_CAP_EXCEPTION_PAYLOAD 164 #ifdef KVM_CAP_IRQ_ROUTING From e6d34aeea6aad52b01956d9cc29e81887939b9ea Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Thu, 18 Oct 2018 00:52:54 +0800 Subject: [PATCH 07/47] target-i386 : add coalesced_pio API the primary API realization. Signed-off-by: Peng Hao Reviewed-by: Eduardo Habkost Message-Id: <1539795177-21038-3-git-send-email-peng.hao2@zte.com.cn> Signed-off-by: Paolo Bonzini --- accel/kvm/kvm-all.c | 56 ++++++++++++++++++++++++++++++++++++++++--- include/exec/memory.h | 4 ++-- memory.c | 4 ++-- 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index de12f78eb8..4880a05399 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -79,6 +79,7 @@ struct KVMState int fd; int vmfd; int coalesced_mmio; + int coalesced_pio; struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; bool coalesced_flush_in_progress; int vcpu_events; @@ -560,6 +561,45 @@ static void kvm_uncoalesce_mmio_region(MemoryListener *listener, } } +static void kvm_coalesce_pio_add(MemoryListener *listener, + MemoryRegionSection *section, + hwaddr start, hwaddr size) +{ + KVMState *s = kvm_state; + + if (s->coalesced_pio) { + struct kvm_coalesced_mmio_zone zone; + + zone.addr = start; + zone.size = size; + zone.pio = 1; + + (void)kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone); + } +} + +static void kvm_coalesce_pio_del(MemoryListener *listener, + MemoryRegionSection *section, + hwaddr start, hwaddr size) +{ + KVMState *s = kvm_state; + + if (s->coalesced_pio) { + struct kvm_coalesced_mmio_zone zone; + + zone.addr = start; + zone.size = size; + zone.pio = 1; + + (void)kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone); + } +} + +static MemoryListener kvm_coalesced_pio_listener = { + .coalesced_io_add = kvm_coalesce_pio_add, + .coalesced_io_del = kvm_coalesce_pio_del, +}; + int kvm_check_extension(KVMState *s, unsigned int extension) { int ret; @@ -1616,6 +1656,8 @@ static int kvm_init(MachineState *ms) } s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO); + s->coalesced_pio = s->coalesced_mmio && + kvm_check_extension(s, KVM_CAP_COALESCED_PIO); #ifdef KVM_CAP_VCPU_EVENTS s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS); @@ -1686,13 +1728,15 @@ static int kvm_init(MachineState *ms) s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add; s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del; } - s->memory_listener.listener.coalesced_mmio_add = kvm_coalesce_mmio_region; - s->memory_listener.listener.coalesced_mmio_del = kvm_uncoalesce_mmio_region; + s->memory_listener.listener.coalesced_io_add = kvm_coalesce_mmio_region; + s->memory_listener.listener.coalesced_io_del = kvm_uncoalesce_mmio_region; kvm_memory_listener_register(s, &s->memory_listener, &address_space_memory, 0); memory_listener_register(&kvm_io_listener, &address_space_io); + memory_listener_register(&kvm_coalesced_pio_listener, + &address_space_io); s->many_ioeventfds = kvm_check_many_ioeventfds(); @@ -1778,7 +1822,13 @@ void kvm_flush_coalesced_mmio_buffer(void) ent = &ring->coalesced_mmio[ring->first]; - cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len); + if (ent->pio == 1) { + address_space_rw(&address_space_io, ent->phys_addr, + MEMTXATTRS_UNSPECIFIED, ent->data, + ent->len, true); + } else { + cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len); + } smp_wmb(); ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX; } diff --git a/include/exec/memory.h b/include/exec/memory.h index 3a427aacf1..667466b8f3 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -419,9 +419,9 @@ struct MemoryListener { bool match_data, uint64_t data, EventNotifier *e); void (*eventfd_del)(MemoryListener *listener, MemoryRegionSection *section, bool match_data, uint64_t data, EventNotifier *e); - void (*coalesced_mmio_add)(MemoryListener *listener, MemoryRegionSection *section, + void (*coalesced_io_add)(MemoryListener *listener, MemoryRegionSection *section, hwaddr addr, hwaddr len); - void (*coalesced_mmio_del)(MemoryListener *listener, MemoryRegionSection *section, + void (*coalesced_io_del)(MemoryListener *listener, MemoryRegionSection *section, hwaddr addr, hwaddr len); /* Lower = earlier (during add), later (during del) */ unsigned priority; diff --git a/memory.c b/memory.c index d852f1143d..51204aa079 100644 --- a/memory.c +++ b/memory.c @@ -2129,7 +2129,7 @@ static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpa .size = fr->addr.size, }; - MEMORY_LISTENER_CALL(as, coalesced_mmio_del, Reverse, §ion, + MEMORY_LISTENER_CALL(as, coalesced_io_del, Reverse, §ion, int128_get64(fr->addr.start), int128_get64(fr->addr.size)); QTAILQ_FOREACH(cmr, &mr->coalesced, link) { @@ -2140,7 +2140,7 @@ static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpa continue; } tmp = addrrange_intersection(tmp, fr->addr); - MEMORY_LISTENER_CALL(as, coalesced_mmio_add, Forward, §ion, + MEMORY_LISTENER_CALL(as, coalesced_io_add, Forward, §ion, int128_get64(tmp.start), int128_get64(tmp.size)); } From f98167ea069190f738c1e79188b4b2325ca08006 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Fri, 19 Oct 2018 03:19:13 +0800 Subject: [PATCH 08/47] target-i386: add rtc 0x70 port as coalesced_pio Signed-off-by: Peng Hao Message-Id: <1539890353-30273-1-git-send-email-peng.hao2@zte.com.cn> Signed-off-by: Paolo Bonzini --- hw/timer/mc146818rtc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c index acee47da0e..e4e4de8b8a 100644 --- a/hw/timer/mc146818rtc.c +++ b/hw/timer/mc146818rtc.c @@ -34,6 +34,7 @@ #include "qapi/qapi-commands-misc.h" #include "qapi/qapi-events-misc.h" #include "qapi/visitor.h" +#include "exec/address-spaces.h" #ifdef TARGET_I386 #include "hw/i386/apic.h" @@ -70,6 +71,7 @@ typedef struct RTCState { ISADevice parent_obj; MemoryRegion io; + MemoryRegion coalesced_io; uint8_t cmos_data[128]; uint8_t cmos_index; int32_t base_year; @@ -990,6 +992,13 @@ static void rtc_realizefn(DeviceState *dev, Error **errp) memory_region_init_io(&s->io, OBJECT(s), &cmos_ops, s, "rtc", 2); isa_register_ioport(isadev, &s->io, base); + /* register rtc 0x70 port for coalesced_pio */ + memory_region_set_flush_coalesced(&s->io); + memory_region_init_io(&s->coalesced_io, OBJECT(s), &cmos_ops, + s, "rtc-index", 1); + memory_region_add_subregion(&s->io, 0, &s->coalesced_io); + memory_region_add_coalescing(&s->coalesced_io, 0, 1); + qdev_set_legacy_instance_id(dev, base, 3); qemu_register_reset(rtc_reset, s); From 37abf8d234d41a7cda986c594c9107c9edddb7d9 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Thu, 18 Oct 2018 00:52:56 +0800 Subject: [PATCH 09/47] target-i386: add i440fx 0xcf8 port as coalesced_pio Signed-off-by: Peng Hao Message-Id: <1539795177-21038-5-git-send-email-peng.hao2@zte.com.cn> Signed-off-by: Paolo Bonzini --- hw/pci-host/piix.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index 0e608347c1..da73743fa2 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -327,6 +327,10 @@ static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) sysbus_add_io(sbd, 0xcfc, &s->data_mem); sysbus_init_ioports(sbd, 0xcfc, 4); + + /* register i440fx 0xcf8 port as coalesced pio */ + memory_region_set_flush_coalesced(&s->data_mem); + memory_region_add_coalescing(&s->conf_mem, 0, 4); } static void i440fx_realize(PCIDevice *dev, Error **errp) From a8de0115008184788525a0fab5cb74368be808a4 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Thu, 18 Oct 2018 00:52:57 +0800 Subject: [PATCH 10/47] target-i386: add q35 0xcf8 port as coalesced_pio Signed-off-by: Peng Hao Message-Id: <1539795177-21038-6-git-send-email-peng.hao2@zte.com.cn> Signed-off-by: Paolo Bonzini --- hw/pci-host/q35.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 02f9576588..8ce1e09932 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -51,6 +51,10 @@ static void q35_host_realize(DeviceState *dev, Error **errp) sysbus_add_io(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, &pci->data_mem); sysbus_init_ioports(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, 4); + /* register q35 0xcf8 port as coalesced pio */ + memory_region_set_flush_coalesced(&pci->data_mem); + memory_region_add_coalescing(&pci->conf_mem, 0, 4); + pci->bus = pci_root_bus_new(DEVICE(s), "pcie.0", s->mch.pci_address_space, s->mch.address_space_io, From ca9759c2a92f528f256fef0e3922416f7bb47bf9 Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Thu, 18 Oct 2018 09:33:45 +0300 Subject: [PATCH 11/47] replay: don't process events at virtual clock checkpoint As QEMU becomes more multi-threaded and non-synchronized, checkpoints move from thread to thread. And the event queue that processed at checkpoints should belong to the same thread in both record and replay executions. This patch disables asynchronous event processing at virtual clock checkpoint, because it may be invoked in different threads at record and replay. This patch is temporary fix until the checkpoints are completely refactored. Signed-off-by: Pavel Dovgalyuk Message-Id: <20181018063345.7433.11678.stgit@pasha-VirtualBox> Signed-off-by: Paolo Bonzini --- replay/replay-events.c | 1 + replay/replay.c | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/replay/replay-events.c b/replay/replay-events.c index 0964a82838..d9a2d495b9 100644 --- a/replay/replay-events.c +++ b/replay/replay-events.c @@ -190,6 +190,7 @@ void replay_save_events(int checkpoint) { g_assert(replay_mutex_locked()); g_assert(checkpoint != CHECKPOINT_CLOCK_WARP_START); + g_assert(checkpoint != CHECKPOINT_CLOCK_VIRTUAL); while (!QTAILQ_EMPTY(&events_list)) { Event *event = QTAILQ_FIRST(&events_list); replay_save_event(event, checkpoint); diff --git a/replay/replay.c b/replay/replay.c index 379b51ab46..8b172b2d1b 100644 --- a/replay/replay.c +++ b/replay/replay.c @@ -214,7 +214,14 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint) /* This checkpoint belongs to several threads. Processing events from different threads is non-deterministic */ - if (checkpoint != CHECKPOINT_CLOCK_WARP_START) { + if (checkpoint != CHECKPOINT_CLOCK_WARP_START + /* FIXME: this is temporary fix, other checkpoints + may also be invoked from the different threads someday. + Asynchronous event processing should be refactored + to create additional replay event kind which is + nailed to the one of the threads and which processes + the event queue. */ + && checkpoint != CHECKPOINT_CLOCK_VIRTUAL) { replay_save_events(checkpoint); } res = true; From 6b7a98303b53b0cd94c2755b1f5c0734bbaa5347 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 9 Oct 2018 15:08:53 +0200 Subject: [PATCH 12/47] i386/kvm: add support for Hyper-V IPI send Hyper-V PV IPI support is merged to KVM, enable the feature in Qemu. When enabled, this allows Windows guests to send IPIs to other vCPUs with a single hypercall even when there are >64 vCPUs in the request. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Roman Kagan Message-Id: <20181009130853.6412-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/hyperv-proto.h | 1 + target/i386/kvm.c | 14 +++++++++++++- 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index c88876dfe3..32ea041c06 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5564,6 +5564,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false), DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), + DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 730c06f80a..caa1544b2e 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1382,6 +1382,7 @@ struct X86CPU { bool hyperv_frequencies; bool hyperv_reenlightenment; bool hyperv_tlbflush; + bool hyperv_ipi; bool check_cpuid; bool enforce_cpuid; bool expose_kvm; diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index d6d5a79293..87f36d14e8 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -58,6 +58,7 @@ #define HV_APIC_ACCESS_RECOMMENDED (1u << 3) #define HV_SYSTEM_RESET_RECOMMENDED (1u << 4) #define HV_RELAXED_TIMING_RECOMMENDED (1u << 5) +#define HV_CLUSTER_IPI_RECOMMENDED (1u << 10) #define HV_EX_PROCESSOR_MASKS_RECOMMENDED (1u << 11) /* diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 302f420064..4e62b5c39b 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -608,7 +608,8 @@ static bool hyperv_enabled(X86CPU *cpu) cpu->hyperv_synic || cpu->hyperv_stimer || cpu->hyperv_reenlightenment || - cpu->hyperv_tlbflush); + cpu->hyperv_tlbflush || + cpu->hyperv_ipi); } static int kvm_arch_set_tsc_khz(CPUState *cs) @@ -888,6 +889,17 @@ int kvm_arch_init_vcpu(CPUState *cs) c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; } + if (cpu->hyperv_ipi) { + if (kvm_check_extension(cs->kvm_state, + KVM_CAP_HYPERV_SEND_IPI) <= 0) { + fprintf(stderr, "Hyper-V IPI send support " + "(requested by 'hv-ipi' cpu flag) " + " is not supported by kernel\n"); + return -ENOSYS; + } + c->eax |= HV_CLUSTER_IPI_RECOMMENDED; + c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; + } c->ebx = cpu->hyperv_spinlock_attempts; From b4e1af8961bf9b0d415abdf3e4908168daea6059 Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Thu, 18 Oct 2018 16:44:01 +0300 Subject: [PATCH 13/47] i386: hvf: Fix register refs if REX is present According to Intel(R)64 and IA-32 Architectures Software Developer's Manual, the following one-byte registers should be fetched when REX prefix is present (sorted by reg encoding index): AL, CL, DL, BL, SPL, BPL, SIL, DIL, R8L - R15L The first 8 are fetched if REX.R is zero, the last 8 if non-zero. The following registers should be fetched for instructions without REX prefix (also sorted by reg encoding index): AL, CL, DL, BL, AH, CH, DH, BH Current emulation code doesn't handle accesses to SPL, BPL, SIL, DIL when REX is present, thefore an instruction 40883e "mov %dil,(%rsi)" is decoded as "mov %bh,(%rsi)". That caused an infinite loop in vp_reset: https://lists.gnu.org/archive/html/qemu-devel/2018-10/msg03293.html Signed-off-by: Roman Bolshakov Message-Id: <20181018134401.44471-1-r.bolshakov@yadro.com> Signed-off-by: Paolo Bonzini --- target/i386/hvf/x86_decode.c | 67 ++++++++++++++++++++---------------- target/i386/hvf/x86_decode.h | 6 ++-- 2 files changed, 42 insertions(+), 31 deletions(-) diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c index 2d7540fe7c..2e33b69541 100644 --- a/target/i386/hvf/x86_decode.c +++ b/target/i386/hvf/x86_decode.c @@ -113,7 +113,8 @@ static void decode_modrm_reg(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = decode->modrm.reg; - op->ptr = get_reg_ref(env, op->reg, decode->rex.r, decode->operand_size); + op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.r, + decode->operand_size); } static void decode_rax(CPUX86State *env, struct x86_decode *decode, @@ -121,7 +122,8 @@ static void decode_rax(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = R_EAX; - op->ptr = get_reg_ref(env, op->reg, 0, decode->operand_size); + op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, 0, + decode->operand_size); } static inline void decode_immediate(CPUX86State *env, struct x86_decode *decode, @@ -263,16 +265,16 @@ static void decode_incgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x40; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_decgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x48; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_incgroup2(CPUX86State *env, struct x86_decode *decode) @@ -288,16 +290,16 @@ static void decode_pushgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x50; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_popgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x58; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_jxx(CPUX86State *env, struct x86_decode *decode) @@ -378,16 +380,16 @@ static void decode_xchgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x90; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_movgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0xb8; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); decode_immediate(env, decode, &decode->op[1], decode->operand_size); } @@ -402,8 +404,8 @@ static void decode_movgroup8(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0xb0; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); decode_immediate(env, decode, &decode->op[1], decode->operand_size); } @@ -412,7 +414,8 @@ static void decode_rcx(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = R_ECX; - op->ptr = get_reg_ref(env, op->reg, decode->rex.b, decode->operand_size); + op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.b, + decode->operand_size); } struct decode_tbl { @@ -639,8 +642,8 @@ static void decode_bswap(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[1] - 0xc8; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_d9_4(CPUX86State *env, struct x86_decode *decode) @@ -1686,7 +1689,8 @@ calc_addr: } } -target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size) +target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended, + int size) { target_ulong ptr = 0; int which = 0; @@ -1698,7 +1702,7 @@ target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size) switch (size) { case 1: - if (is_extended || reg < 4) { + if (is_extended || reg < 4 || rex) { which = 1; ptr = (target_ulong)&RL(env, reg); } else { @@ -1714,10 +1718,11 @@ target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size) return ptr; } -target_ulong get_reg_val(CPUX86State *env, int reg, int is_extended, int size) +target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended, + int size) { target_ulong val = 0; - memcpy(&val, (void *)get_reg_ref(env, reg, is_extended, size), size); + memcpy(&val, (void *)get_reg_ref(env, reg, rex, is_extended, size), size); return val; } @@ -1739,7 +1744,8 @@ static target_ulong get_sib_val(CPUX86State *env, struct x86_decode *decode, if (base_reg == R_ESP || base_reg == R_EBP) { *sel = R_SS; } - base = get_reg_val(env, decode->sib.base, decode->rex.b, addr_size); + base = get_reg_val(env, decode->sib.base, decode->rex.rex, + decode->rex.b, addr_size); } if (decode->rex.x) { @@ -1747,7 +1753,8 @@ static target_ulong get_sib_val(CPUX86State *env, struct x86_decode *decode, } if (index_reg != R_ESP) { - scaled_index = get_reg_val(env, index_reg, decode->rex.x, addr_size) << + scaled_index = get_reg_val(env, index_reg, decode->rex.rex, + decode->rex.x, addr_size) << decode->sib.scale; } return base + scaled_index; @@ -1776,7 +1783,8 @@ void calc_modrm_operand32(CPUX86State *env, struct x86_decode *decode, if (decode->modrm.rm == R_EBP || decode->modrm.rm == R_ESP) { seg = R_SS; } - ptr += get_reg_val(env, decode->modrm.rm, decode->rex.b, addr_size); + ptr += get_reg_val(env, decode->modrm.rm, decode->rex.rex, + decode->rex.b, addr_size); } if (X86_DECODE_CMD_LEA == decode->cmd) { @@ -1805,7 +1813,8 @@ void calc_modrm_operand64(CPUX86State *env, struct x86_decode *decode, } else if (0 == mod && 5 == rm) { ptr = RIP(env) + decode->len + (int32_t) offset; } else { - ptr = get_reg_val(env, src, decode->rex.b, 8) + (int64_t) offset; + ptr = get_reg_val(env, src, decode->rex.rex, decode->rex.b, 8) + + (int64_t) offset; } if (X86_DECODE_CMD_LEA == decode->cmd) { @@ -1822,8 +1831,8 @@ void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, if (3 == decode->modrm.mod) { op->reg = decode->modrm.reg; op->type = X86_VAR_REG; - op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.b, - decode->operand_size); + op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.rex, + decode->rex.b, decode->operand_size); return; } diff --git a/target/i386/hvf/x86_decode.h b/target/i386/hvf/x86_decode.h index 5ab6f31fa5..ef4bcab310 100644 --- a/target/i386/hvf/x86_decode.h +++ b/target/i386/hvf/x86_decode.h @@ -303,8 +303,10 @@ uint64_t sign(uint64_t val, int size); uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode); -target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size); -target_ulong get_reg_val(CPUX86State *env, int reg, int is_extended, int size); +target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended, + int size); +target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended, + int size); void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, struct x86_decode_op *op); target_ulong decode_linear_addr(CPUX86State *env, struct x86_decode *decode, From 92cc3aaa1fb4165a6236a2265c6be4ea531d0988 Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Thu, 18 Oct 2018 17:30:51 +0300 Subject: [PATCH 14/47] i386: hvf: Remove hvf_disabled accel_init_machine sets *(acc->allowed) to true if acc->init_machine(ms) succeeds. There's no need to have both hvf_allowed and hvf_disabled. Signed-off-by: Roman Bolshakov Message-Id: <20181018143051.48508-1-r.bolshakov@yadro.com> Signed-off-by: Paolo Bonzini --- include/sysemu/hvf.h | 4 ++-- target/i386/hvf/hvf.c | 9 +-------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h index 241118845c..aaa51d2c51 100644 --- a/include/sysemu/hvf.h +++ b/include/sysemu/hvf.h @@ -17,7 +17,7 @@ #include "exec/memory.h" #include "sysemu/accel.h" -extern int hvf_disabled; +extern bool hvf_allowed; #ifdef CONFIG_HVF #include #include @@ -26,7 +26,7 @@ extern int hvf_disabled; #include "hw/hw.h" uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, int reg); -#define hvf_enabled() !hvf_disabled +#define hvf_enabled() (hvf_allowed) #else #define hvf_enabled() 0 #define hvf_get_supported_cpuid(func, idx, reg) 0 diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 9f52bc413a..e193022c03 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -73,7 +73,6 @@ #include "target/i386/cpu.h" HVFState *hvf_state; -int hvf_disabled = 1; static void assert_hvf_ok(hv_return_t ret) { @@ -604,11 +603,6 @@ int hvf_init_vcpu(CPUState *cpu) return 0; } -void hvf_disable(int shouldDisable) -{ - hvf_disabled = shouldDisable; -} - static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info) { X86CPU *x86_cpu = X86_CPU(cpu); @@ -934,7 +928,7 @@ int hvf_vcpu_exec(CPUState *cpu) return ret; } -static bool hvf_allowed; +bool hvf_allowed; static int hvf_accel_init(MachineState *ms) { @@ -942,7 +936,6 @@ static int hvf_accel_init(MachineState *ms) hv_return_t ret; HVFState *s; - hvf_disable(0); ret = hv_vm_create(HV_VM_DEFAULT); assert_hvf_ok(ret); From 238d1240d921e6624f4c6d2f063b5c1750d55210 Mon Sep 17 00:00:00 2001 From: Artem Pisarenko Date: Thu, 18 Oct 2018 13:12:52 +0600 Subject: [PATCH 15/47] vl: improve/fix documentation related to RTC function Documentation describing -rtc option updated to better match current implementation and highlight some important specifics. Signed-off-by: Artem Pisarenko Message-Id: <1b245c6c0803d4bf11dcbf9eb32f34af8c2bd0b4.1539846575.git.artem.k.pisarenko@gmail.com> Signed-off-by: Paolo Bonzini --- qemu-options.hx | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/qemu-options.hx b/qemu-options.hx index f139459e80..829ed81e35 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3458,25 +3458,29 @@ HXCOMM Silently ignored for compatibility DEF("clock", HAS_ARG, QEMU_OPTION_clock, "", QEMU_ARCH_ALL) DEF("rtc", HAS_ARG, QEMU_OPTION_rtc, \ - "-rtc [base=utc|localtime|date][,clock=host|rt|vm][,driftfix=none|slew]\n" \ + "-rtc [base=utc|localtime|][,clock=host|rt|vm][,driftfix=none|slew]\n" \ " set the RTC base and clock, enable drift fix for clock ticks (x86 only)\n", QEMU_ARCH_ALL) STEXI -@item -rtc [base=utc|localtime|@var{date}][,clock=host|vm][,driftfix=none|slew] +@item -rtc [base=utc|localtime|@var{datetime}][,clock=host|rt|vm][,driftfix=none|slew] @findex -rtc Specify @option{base} as @code{utc} or @code{localtime} to let the RTC start at the current UTC or local time, respectively. @code{localtime} is required for correct date in -MS-DOS or Windows. To start at a specific point in time, provide @var{date} in the +MS-DOS or Windows. To start at a specific point in time, provide @var{datetime} in the format @code{2006-06-17T16:01:21} or @code{2006-06-17}. The default base is UTC. By default the RTC is driven by the host system time. This allows using of the RTC as accurate reference clock inside the guest, specifically if the host time is smoothly following an accurate external reference clock, e.g. via NTP. If you want to isolate the guest time from the host, you can set @option{clock} -to @code{rt} instead. To even prevent it from progressing during suspension, -you can set it to @code{vm}. +to @code{rt} instead, which provides a host monotonic clock if host support it. +To even prevent the RTC from progressing during suspension, you can set @option{clock} +to @code{vm} (virtual clock). @samp{clock=vm} is recommended especially in +icount mode in order to preserve determinism; however, note that in icount mode +the speed of the virtual clock is variable and can in general differ from the +host clock. Enable @option{driftfix} (i386 targets only) if you experience time drift problems, specifically with Windows' ACPI HAL. This option will try to figure out how From 7e166ebd8c7bade39002b0a81a1795775af21dde Mon Sep 17 00:00:00 2001 From: Artem Pisarenko Date: Thu, 18 Oct 2018 13:12:53 +0600 Subject: [PATCH 16/47] vl: refactor -rtc option references Improve code readability and prepare for fixing bug #1797033 Signed-off-by: Artem Pisarenko Message-Id: <9330a48899f997431a34460014886d118a7c0960.1539846575.git.artem.k.pisarenko@gmail.com> Signed-off-by: Paolo Bonzini --- vl.c | 82 ++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 32 deletions(-) diff --git a/vl.c b/vl.c index 4e25c78bff..d15e086140 100644 --- a/vl.c +++ b/vl.c @@ -147,8 +147,13 @@ bool enable_cpu_pm = false; int nb_nics; NICInfo nd_table[MAX_NICS]; int autostart; -static int rtc_utc = 1; -static int rtc_date_offset = -1; /* -1 means no change */ +static enum { + RTC_BASE_UTC, + RTC_BASE_LOCALTIME, + RTC_BASE_DATETIME, +} rtc_base_type = RTC_BASE_UTC; +static int rtc_host_datetime_offset = -1; /* valid only for host rtc_clock and + rtc_base_type=RTC_BASE_DATETIME */ QEMUClockType rtc_clock; int vga_interface_type = VGA_NONE; static DisplayOptions dpy; @@ -782,26 +787,30 @@ void qemu_system_vmstop_request(RunState state) /***********************************************************/ /* real time host monotonic timer */ -static time_t qemu_time(void) +static time_t qemu_timedate(void) { return qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000; } /***********************************************************/ -/* host time/date access */ +/* RTC reference time/date access */ void qemu_get_timedate(struct tm *tm, int offset) { - time_t ti = qemu_time(); + time_t ti = qemu_timedate(); ti += offset; - if (rtc_date_offset == -1) { - if (rtc_utc) - gmtime_r(&ti, tm); - else - localtime_r(&ti, tm); - } else { - ti -= rtc_date_offset; + + switch (rtc_base_type) { + case RTC_BASE_UTC: gmtime_r(&ti, tm); + break; + case RTC_BASE_LOCALTIME: + localtime_r(&ti, tm); + break; + case RTC_BASE_DATETIME: + ti -= rtc_host_datetime_offset; + gmtime_r(&ti, tm); + break; } } @@ -809,23 +818,30 @@ int qemu_timedate_diff(struct tm *tm) { time_t seconds; - if (rtc_date_offset == -1) - if (rtc_utc) - seconds = mktimegm(tm); - else { - struct tm tmp = *tm; - tmp.tm_isdst = -1; /* use timezone to figure it out */ - seconds = mktime(&tmp); - } - else - seconds = mktimegm(tm) + rtc_date_offset; + switch (rtc_base_type) { + case RTC_BASE_UTC: + seconds = mktimegm(tm); + break; + case RTC_BASE_LOCALTIME: + { + struct tm tmp = *tm; + tmp.tm_isdst = -1; /* use timezone to figure it out */ + seconds = mktime(&tmp); + break; + } + case RTC_BASE_DATETIME: + seconds = mktimegm(tm) + rtc_host_datetime_offset; + break; + default: + abort(); + } - return seconds - qemu_time(); + return seconds - qemu_timedate(); } -static void configure_rtc_date_offset(const char *startdate) +static void configure_rtc_host_datetime_offset(const char *startdate) { - time_t rtc_start_date; + time_t rtc_start_datetime; struct tm tm; if (sscanf(startdate, "%d-%d-%dT%d:%d:%d", &tm.tm_year, &tm.tm_mon, @@ -841,15 +857,16 @@ static void configure_rtc_date_offset(const char *startdate) } tm.tm_year -= 1900; tm.tm_mon--; - rtc_start_date = mktimegm(&tm); - if (rtc_start_date == -1) { + rtc_start_datetime = mktimegm(&tm); + if (rtc_start_datetime == -1) { date_fail: - error_report("invalid date format"); + error_report("invalid datetime format"); error_printf("valid formats: " "'2006-06-17T16:01:21' or '2006-06-17'\n"); exit(1); } - rtc_date_offset = qemu_time() - rtc_start_date; + rtc_host_datetime_offset = (qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000) + - rtc_start_datetime; } static void configure_rtc(QemuOpts *opts) @@ -859,15 +876,16 @@ static void configure_rtc(QemuOpts *opts) value = qemu_opt_get(opts, "base"); if (value) { if (!strcmp(value, "utc")) { - rtc_utc = 1; + rtc_base_type = RTC_BASE_UTC; } else if (!strcmp(value, "localtime")) { Error *blocker = NULL; - rtc_utc = 0; + rtc_base_type = RTC_BASE_LOCALTIME; error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "-rtc base=localtime"); replay_add_blocker(blocker); } else { - configure_rtc_date_offset(value); + rtc_base_type = RTC_BASE_DATETIME; + configure_rtc_host_datetime_offset(value); } } value = qemu_opt_get(opts, "clock"); From eb6a52099160f1a6e66d7ee042bbf7fb5dba67d6 Mon Sep 17 00:00:00 2001 From: Artem Pisarenko Date: Thu, 18 Oct 2018 13:12:54 +0600 Subject: [PATCH 17/47] Fixes RTC bug with base datetime shifts in clock=vm This makes all current "-rtc" option parameters combinations produce fixed/unambiguous RTC timedate reference for hardware emulation frontends. It restores determinism of guest execution when used with clock=vm and specified base value. Buglink: https://bugs.launchpad.net/qemu/+bug/1797033 Signed-off-by: Artem Pisarenko Message-Id: <1d963c3e013dfedafa1f6edb9fb219b7e49e39da.1539846575.git.artem.k.pisarenko@gmail.com> Signed-off-by: Paolo Bonzini --- vl.c | 62 ++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/vl.c b/vl.c index d15e086140..5bb53f7204 100644 --- a/vl.c +++ b/vl.c @@ -152,8 +152,10 @@ static enum { RTC_BASE_LOCALTIME, RTC_BASE_DATETIME, } rtc_base_type = RTC_BASE_UTC; -static int rtc_host_datetime_offset = -1; /* valid only for host rtc_clock and - rtc_base_type=RTC_BASE_DATETIME */ +static time_t rtc_ref_start_datetime; +static int rtc_realtime_clock_offset; /* used only with QEMU_CLOCK_REALTIME */ +static int rtc_host_datetime_offset = -1; /* valid & used only with + RTC_BASE_DATETIME */ QEMUClockType rtc_clock; int vga_interface_type = VGA_NONE; static DisplayOptions dpy; @@ -247,6 +249,7 @@ static struct { static QemuOptsList qemu_rtc_opts = { .name = "rtc", .head = QTAILQ_HEAD_INITIALIZER(qemu_rtc_opts.head), + .merge_lists = true, .desc = { { .name = "base", @@ -785,32 +788,42 @@ void qemu_system_vmstop_request(RunState state) } /***********************************************************/ -/* real time host monotonic timer */ - -static time_t qemu_timedate(void) +/* RTC reference time/date access */ +static time_t qemu_ref_timedate(void) { - return qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000; + time_t value = qemu_clock_get_ms(rtc_clock) / 1000; + switch (rtc_clock) { + case QEMU_CLOCK_REALTIME: + value -= rtc_realtime_clock_offset; + /* no break */ + case QEMU_CLOCK_VIRTUAL: + value += rtc_ref_start_datetime; + break; + case QEMU_CLOCK_HOST: + if (rtc_base_type == RTC_BASE_DATETIME) { + value -= rtc_host_datetime_offset; + } + break; + default: + assert(0); + } + return value; } -/***********************************************************/ -/* RTC reference time/date access */ void qemu_get_timedate(struct tm *tm, int offset) { - time_t ti = qemu_timedate(); + time_t ti = qemu_ref_timedate(); ti += offset; switch (rtc_base_type) { + case RTC_BASE_DATETIME: case RTC_BASE_UTC: gmtime_r(&ti, tm); break; case RTC_BASE_LOCALTIME: localtime_r(&ti, tm); break; - case RTC_BASE_DATETIME: - ti -= rtc_host_datetime_offset; - gmtime_r(&ti, tm); - break; } } @@ -819,6 +832,7 @@ int qemu_timedate_diff(struct tm *tm) time_t seconds; switch (rtc_base_type) { + case RTC_BASE_DATETIME: case RTC_BASE_UTC: seconds = mktimegm(tm); break; @@ -829,17 +843,14 @@ int qemu_timedate_diff(struct tm *tm) seconds = mktime(&tmp); break; } - case RTC_BASE_DATETIME: - seconds = mktimegm(tm) + rtc_host_datetime_offset; - break; default: abort(); } - return seconds - qemu_timedate(); + return seconds - qemu_ref_timedate(); } -static void configure_rtc_host_datetime_offset(const char *startdate) +static void configure_rtc_base_datetime(const char *startdate) { time_t rtc_start_datetime; struct tm tm; @@ -865,14 +876,19 @@ static void configure_rtc_host_datetime_offset(const char *startdate) "'2006-06-17T16:01:21' or '2006-06-17'\n"); exit(1); } - rtc_host_datetime_offset = (qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000) - - rtc_start_datetime; + rtc_host_datetime_offset = rtc_ref_start_datetime - rtc_start_datetime; + rtc_ref_start_datetime = rtc_start_datetime; } static void configure_rtc(QemuOpts *opts) { const char *value; + /* Set defaults */ + rtc_clock = QEMU_CLOCK_HOST; + rtc_ref_start_datetime = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000; + rtc_realtime_clock_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000; + value = qemu_opt_get(opts, "base"); if (value) { if (!strcmp(value, "utc")) { @@ -885,7 +901,7 @@ static void configure_rtc(QemuOpts *opts) replay_add_blocker(blocker); } else { rtc_base_type = RTC_BASE_DATETIME; - configure_rtc_host_datetime_offset(value); + configure_rtc_base_datetime(value); } } value = qemu_opt_get(opts, "clock"); @@ -3035,7 +3051,6 @@ int main(int argc, char **argv, char **envp) error_reportf_err(err, "cannot initialize crypto: "); exit(1); } - rtc_clock = QEMU_CLOCK_HOST; QLIST_INIT (&vm_change_state_head); os_setup_early_signal_handling(); @@ -3755,7 +3770,6 @@ int main(int argc, char **argv, char **envp) if (!opts) { exit(1); } - configure_rtc(opts); break; case QEMU_OPTION_tb_size: #ifndef CONFIG_TCG @@ -3973,6 +3987,8 @@ int main(int argc, char **argv, char **envp) exit(EXIT_FAILURE); } + configure_rtc(qemu_find_opts_singleton("rtc")); + machine_class = select_machine(); set_memory_options(&ram_slots, &maxram_size, machine_class); From ac0989f53dd08ba7831130396f794b3e3bdf2d1a Mon Sep 17 00:00:00 2001 From: Artem Pisarenko Date: Thu, 18 Oct 2018 13:12:55 +0600 Subject: [PATCH 18/47] vl, qapi: offset calculation in RTC_CHANGE event reverted Return value of qemu_timedate_diff(), used for calculation offset in QAPI 'RTC_CHANGE' event, restored to keep compatibility. Since it wasn't documented that difference is relative to host clock advancement, this change also adds important note to 'RTC_CHANGE' event description to highlight established implementation specifics. Signed-off-by: Artem Pisarenko Message-Id: <1fc12c77e8b7115d3842919a8b586d9cbe4efca6.1539846575.git.artem.k.pisarenko@gmail.com> Signed-off-by: Paolo Bonzini --- qapi/misc.json | 3 ++- vl.c | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/qapi/misc.json b/qapi/misc.json index 3a68af9ca3..c85c6c8ca3 100644 --- a/qapi/misc.json +++ b/qapi/misc.json @@ -3070,7 +3070,8 @@ # Emitted when the guest changes the RTC time. # # @offset: offset between base RTC clock (as specified by -rtc base), and -# new RTC clock value +# new RTC clock value. Note that value will be different depending +# on clock chosen to drive RTC (specified by -rtc clock). # # Note: This event is rate-limited. # diff --git a/vl.c b/vl.c index 5bb53f7204..c5fffa349b 100644 --- a/vl.c +++ b/vl.c @@ -789,10 +789,10 @@ void qemu_system_vmstop_request(RunState state) /***********************************************************/ /* RTC reference time/date access */ -static time_t qemu_ref_timedate(void) +static time_t qemu_ref_timedate(QEMUClockType clock) { - time_t value = qemu_clock_get_ms(rtc_clock) / 1000; - switch (rtc_clock) { + time_t value = qemu_clock_get_ms(clock) / 1000; + switch (clock) { case QEMU_CLOCK_REALTIME: value -= rtc_realtime_clock_offset; /* no break */ @@ -812,7 +812,7 @@ static time_t qemu_ref_timedate(void) void qemu_get_timedate(struct tm *tm, int offset) { - time_t ti = qemu_ref_timedate(); + time_t ti = qemu_ref_timedate(rtc_clock); ti += offset; @@ -847,7 +847,7 @@ int qemu_timedate_diff(struct tm *tm) abort(); } - return seconds - qemu_ref_timedate(); + return seconds - qemu_ref_timedate(QEMU_CLOCK_HOST); } static void configure_rtc_base_datetime(const char *startdate) From 8b5e6caf01f778a257721cae499392e8f1d55ddb Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 16 Oct 2018 15:33:40 +0200 Subject: [PATCH 19/47] call HotplugHandler->plug() as the last step in device realization When [2] was fixed it was agreed that adding and calling post_plug() callback after device_reset() was low risk approach to hotfix issue right before release. So it was merged instead of moving already existing plug() callback after device_reset() is called which would be more risky and require all plug() callbacks audit. Looking at the current plug() callbacks, it doesn't seem that moving plug() callback after device_reset() is breaking anything, so here goes agreed upon [3] proper fix which essentially reverts [1][2] and moves plug() callback after device_reset(). This way devices always comes to plug() stage, after it's been fully initialized (including being reset), which fixes race condition [2] without need for an extra post_plug() callback. 1. (25e897881 "qdev: add HotplugHandler->post_plug() callback") 2. (8449bcf94 "virtio-scsi: fix hotplug ->reset() vs event race") 3. https://www.mail-archive.com/qemu-devel@nongnu.org/msg549915.html Signed-off-by: Igor Mammedov Message-Id: <1539696820-273275-1-git-send-email-imammedo@redhat.com> Reviewed-by: Stefan Hajnoczi Tested-by: Pierre Morel Acked-by: Pierre Morel Signed-off-by: Paolo Bonzini --- hw/core/hotplug.c | 10 ---------- hw/core/qdev.c | 16 ++++++---------- hw/scsi/virtio-scsi.c | 11 +---------- include/hw/hotplug.h | 11 ----------- 4 files changed, 7 insertions(+), 41 deletions(-) diff --git a/hw/core/hotplug.c b/hw/core/hotplug.c index 2253072d0e..17ac986685 100644 --- a/hw/core/hotplug.c +++ b/hw/core/hotplug.c @@ -35,16 +35,6 @@ void hotplug_handler_plug(HotplugHandler *plug_handler, } } -void hotplug_handler_post_plug(HotplugHandler *plug_handler, - DeviceState *plugged_dev) -{ - HotplugHandlerClass *hdc = HOTPLUG_HANDLER_GET_CLASS(plug_handler); - - if (hdc->post_plug) { - hdc->post_plug(plug_handler, plugged_dev); - } -} - void hotplug_handler_unplug_request(HotplugHandler *plug_handler, DeviceState *plugged_dev, Error **errp) diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 046d8f1f76..6b3cc55b27 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -832,14 +832,6 @@ static void device_set_realized(Object *obj, bool value, Error **errp) DEVICE_LISTENER_CALL(realize, Forward, dev); - if (hotplug_ctrl) { - hotplug_handler_plug(hotplug_ctrl, dev, &local_err); - } - - if (local_err != NULL) { - goto post_realize_fail; - } - /* * always free/re-initialize here since the value cannot be cleaned up * in device_unrealize due to its usage later on in the unplug path @@ -869,8 +861,12 @@ static void device_set_realized(Object *obj, bool value, Error **errp) dev->pending_deleted_event = false; if (hotplug_ctrl) { - hotplug_handler_post_plug(hotplug_ctrl, dev); - } + hotplug_handler_plug(hotplug_ctrl, dev, &local_err); + if (local_err != NULL) { + goto child_realize_fail; + } + } + } else if (!value && dev->realized) { Error **local_errp = NULL; QLIST_FOREACH(bus, &dev->child_bus, sibling) { diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 5a3057d1f8..3aa99717e2 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -797,16 +797,8 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, virtio_scsi_acquire(s); blk_set_aio_context(sd->conf.blk, s->ctx); virtio_scsi_release(s); - } -} -/* Announce the new device after it has been plugged */ -static void virtio_scsi_post_hotplug(HotplugHandler *hotplug_dev, - DeviceState *dev) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); - VirtIOSCSI *s = VIRTIO_SCSI(vdev); - SCSIDevice *sd = SCSI_DEVICE(dev); + } if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { virtio_scsi_acquire(s); @@ -976,7 +968,6 @@ static void virtio_scsi_class_init(ObjectClass *klass, void *data) vdc->start_ioeventfd = virtio_scsi_dataplane_start; vdc->stop_ioeventfd = virtio_scsi_dataplane_stop; hc->plug = virtio_scsi_hotplug; - hc->post_plug = virtio_scsi_post_hotplug; hc->unplug = virtio_scsi_hotunplug; } diff --git a/include/hw/hotplug.h b/include/hw/hotplug.h index 51541d63e1..1a0516a479 100644 --- a/include/hw/hotplug.h +++ b/include/hw/hotplug.h @@ -47,8 +47,6 @@ typedef void (*hotplug_fn)(HotplugHandler *plug_handler, * @parent: Opaque parent interface. * @pre_plug: pre plug callback called at start of device.realize(true) * @plug: plug callback called at end of device.realize(true). - * @post_plug: post plug callback called after device.realize(true) and device - * reset * @unplug_request: unplug request callback. * Used as a means to initiate device unplug for devices that * require asynchronous unplug handling. @@ -63,7 +61,6 @@ typedef struct HotplugHandlerClass { /* */ hotplug_fn pre_plug; hotplug_fn plug; - void (*post_plug)(HotplugHandler *plug_handler, DeviceState *plugged_dev); hotplug_fn unplug_request; hotplug_fn unplug; } HotplugHandlerClass; @@ -86,14 +83,6 @@ void hotplug_handler_pre_plug(HotplugHandler *plug_handler, DeviceState *plugged_dev, Error **errp); -/** - * hotplug_handler_post_plug: - * - * Call #HotplugHandlerClass.post_plug callback of @plug_handler. - */ -void hotplug_handler_post_plug(HotplugHandler *plug_handler, - DeviceState *plugged_dev); - /** * hotplug_handler_unplug_request: * From a519e3894436501c5c6da5e82f6b867d2c05afb5 Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Fri, 12 Oct 2018 22:51:03 -0700 Subject: [PATCH 20/47] hw: edu: drop DO_UPCAST Signed-off-by: Li Qiang Signed-off-by: Paolo Bonzini --- hw/misc/edu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/misc/edu.c b/hw/misc/edu.c index 0687ffd343..cdcf550dd7 100644 --- a/hw/misc/edu.c +++ b/hw/misc/edu.c @@ -342,7 +342,7 @@ static void *edu_fact_thread(void *opaque) static void pci_edu_realize(PCIDevice *pdev, Error **errp) { - EduState *edu = DO_UPCAST(EduState, pdev, pdev); + EduState *edu = EDU(pdev); uint8_t *pci_conf = pdev->config; pci_config_set_interrupt_pin(pci_conf, 1); @@ -365,7 +365,7 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp) static void pci_edu_uninit(PCIDevice *pdev) { - EduState *edu = DO_UPCAST(EduState, pdev, pdev); + EduState *edu = EDU(pdev); qemu_mutex_lock(&edu->thr_mutex); edu->stopping = true; From e6aa5ba4ac423dd282c831402110970ee278392e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 13 Oct 2018 11:49:16 +0200 Subject: [PATCH 21/47] scsi-disk: fix double completion of failing passthrough requests If a command fails with a sense that scsi_sense_buf_to_errno converts to ECANCELED/EAGAIN/ENOTCONN or with a unit attention, scsi_req_complete is called twice. This caused a crash. Reported-by: Wangguang Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-disk.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index c43163cef4..4074d7c2bf 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -441,9 +441,18 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) } switch (error) { case 0: - /* The command has run, no need to fake sense. */ + /* A passthrough command has run and has produced sense data; check + * whether the error has to be handled by the guest or should rather + * pause the host. + */ assert(r->status && *r->status); - scsi_req_complete(&r->req, *r->status); + error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); + if (error == ECANCELED || error == EAGAIN || error == ENOTCONN || + error == 0) { + /* These errors are handled by guest. */ + scsi_req_complete(&r->req, *r->status); + return true; + } break; case ENOMEDIUM: scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); @@ -462,17 +471,6 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) break; } } - if (!error) { - assert(r->status && *r->status); - error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); - - if (error == ECANCELED || error == EAGAIN || error == ENOTCONN || - error == 0) { - /* These errors are handled by guest. */ - scsi_req_complete(&r->req, *r->status); - return true; - } - } blk_error_action(s->qdev.conf.blk, action, is_read, error); if (action == BLOCK_ERROR_ACTION_STOP) { From 40dce4ee61c68395f6d463fae792f61b7c003bce Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 13 Oct 2018 11:52:34 +0200 Subject: [PATCH 22/47] scsi-disk: fix rerror/werror=ignore rerror=ignore was returning true from scsi_handle_rw_error but the callers were not calling scsi_req_complete when rerror=ignore returns true (this is the correct thing to do when true is returned after executing a passthrough command). Fix this by calling it in scsi_handle_rw_error. Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-disk.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 4074d7c2bf..e2c5408aa2 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -473,10 +473,15 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) } blk_error_action(s->qdev.conf.blk, action, is_read, error); + if (action == BLOCK_ERROR_ACTION_IGNORE) { + scsi_req_complete(&r->req, 0); + return true; + } + if (action == BLOCK_ERROR_ACTION_STOP) { scsi_req_retry(&r->req); } - return action != BLOCK_ERROR_ACTION_IGNORE; + return false; } static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) From 1ba5c3a95443a4c52baaf5339fda1b88c09f3203 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:18:29 +0300 Subject: [PATCH 23/47] hyperv_testdev: refactor for better maintainability Make hyperv_testdev slightly easier to follow and enhance in future. For that, put the hyperv sint routes (wrapped in a helper structure) on a linked list rather than a fixed-size array. Besides, this way HvSintRoute can be treated as an opaque structure, allowing for easier refactoring of the core Hyper-V SynIC code in followup pathches. Signed-off-by: Roman Kagan Message-Id: <20180921081836.29230-2-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- hw/misc/hyperv_testdev.c | 120 +++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 63 deletions(-) diff --git a/hw/misc/hyperv_testdev.c b/hw/misc/hyperv_testdev.c index 7549f470b1..915d74e177 100644 --- a/hw/misc/hyperv_testdev.c +++ b/hw/misc/hyperv_testdev.c @@ -12,6 +12,7 @@ */ #include "qemu/osdep.h" +#include "qemu/queue.h" #include #include "hw/hw.h" #include "hw/qdev.h" @@ -20,12 +21,17 @@ #include "target/i386/hyperv.h" #include "kvm_i386.h" -#define HV_TEST_DEV_MAX_SINT_ROUTES 64 +typedef struct TestSintRoute { + QLIST_ENTRY(TestSintRoute) le; + uint8_t vp_index; + uint8_t sint; + HvSintRoute *sint_route; +} TestSintRoute; struct HypervTestDev { ISADevice parent_obj; MemoryRegion sint_control; - HvSintRoute *sint_route[HV_TEST_DEV_MAX_SINT_ROUTES]; + QLIST_HEAD(, TestSintRoute) sint_routes; }; typedef struct HypervTestDev HypervTestDev; @@ -39,70 +45,56 @@ enum { HV_TEST_DEV_SINT_ROUTE_SET_SINT }; -static int alloc_sint_route_index(HypervTestDev *dev) +static void sint_route_create(HypervTestDev *dev, + uint8_t vp_index, uint8_t sint) { - int i; + TestSintRoute *sint_route; - for (i = 0; i < ARRAY_SIZE(dev->sint_route); i++) { - if (dev->sint_route[i] == NULL) { - return i; + sint_route = g_new0(TestSintRoute, 1); + assert(sint_route); + + sint_route->vp_index = vp_index; + sint_route->sint = sint; + + sint_route->sint_route = kvm_hv_sint_route_create(vp_index, sint, NULL); + assert(sint_route->sint_route); + + QLIST_INSERT_HEAD(&dev->sint_routes, sint_route, le); +} + +static TestSintRoute *sint_route_find(HypervTestDev *dev, + uint8_t vp_index, uint8_t sint) +{ + TestSintRoute *sint_route; + + QLIST_FOREACH(sint_route, &dev->sint_routes, le) { + if (sint_route->vp_index == vp_index && sint_route->sint == sint) { + return sint_route; } } - return -1; + assert(false); + return NULL; } -static void free_sint_route_index(HypervTestDev *dev, int i) +static void sint_route_destroy(HypervTestDev *dev, + uint8_t vp_index, uint8_t sint) { - assert(i >= 0 && i < ARRAY_SIZE(dev->sint_route)); - dev->sint_route[i] = NULL; + TestSintRoute *sint_route; + + sint_route = sint_route_find(dev, vp_index, sint); + QLIST_REMOVE(sint_route, le); + kvm_hv_sint_route_destroy(sint_route->sint_route); + g_free(sint_route); } -static int find_sint_route_index(HypervTestDev *dev, uint32_t vp_index, - uint32_t sint) +static void sint_route_set_sint(HypervTestDev *dev, + uint8_t vp_index, uint8_t sint) { - HvSintRoute *sint_route; - int i; + TestSintRoute *sint_route; - for (i = 0; i < ARRAY_SIZE(dev->sint_route); i++) { - sint_route = dev->sint_route[i]; - if (sint_route && sint_route->vp_index == vp_index && - sint_route->sint == sint) { - return i; - } - } - return -1; -} + sint_route = sint_route_find(dev, vp_index, sint); -static void hv_synic_test_dev_control(HypervTestDev *dev, uint32_t ctl, - uint32_t vp_index, uint32_t sint) -{ - int i; - HvSintRoute *sint_route; - - switch (ctl) { - case HV_TEST_DEV_SINT_ROUTE_CREATE: - i = alloc_sint_route_index(dev); - assert(i >= 0); - sint_route = kvm_hv_sint_route_create(vp_index, sint, NULL); - assert(sint_route); - dev->sint_route[i] = sint_route; - break; - case HV_TEST_DEV_SINT_ROUTE_DESTROY: - i = find_sint_route_index(dev, vp_index, sint); - assert(i >= 0); - sint_route = dev->sint_route[i]; - kvm_hv_sint_route_destroy(sint_route); - free_sint_route_index(dev, i); - break; - case HV_TEST_DEV_SINT_ROUTE_SET_SINT: - i = find_sint_route_index(dev, vp_index, sint); - assert(i >= 0); - sint_route = dev->sint_route[i]; - kvm_hv_sint_route_set_sint(sint_route); - break; - default: - break; - } + kvm_hv_sint_route_set_sint(sint_route->sint_route); } static uint64_t hv_test_dev_read(void *opaque, hwaddr addr, unsigned size) @@ -114,18 +106,20 @@ static void hv_test_dev_write(void *opaque, hwaddr addr, uint64_t data, uint32_t len) { HypervTestDev *dev = HYPERV_TEST_DEV(opaque); - uint8_t ctl; + uint8_t sint = data & 0xFF; + uint8_t vp_index = (data >> 8ULL) & 0xFF; + uint8_t ctl = (data >> 16ULL) & 0xFF; - ctl = (data >> 16ULL) & 0xFF; switch (ctl) { case HV_TEST_DEV_SINT_ROUTE_CREATE: - case HV_TEST_DEV_SINT_ROUTE_DESTROY: - case HV_TEST_DEV_SINT_ROUTE_SET_SINT: { - uint8_t sint = data & 0xFF; - uint8_t vp_index = (data >> 8ULL) & 0xFF; - hv_synic_test_dev_control(dev, ctl, vp_index, sint); + sint_route_create(dev, vp_index, sint); + break; + case HV_TEST_DEV_SINT_ROUTE_DESTROY: + sint_route_destroy(dev, vp_index, sint); + break; + case HV_TEST_DEV_SINT_ROUTE_SET_SINT: + sint_route_set_sint(dev, vp_index, sint); break; - } default: break; } @@ -145,7 +139,7 @@ static void hv_test_dev_realizefn(DeviceState *d, Error **errp) HypervTestDev *dev = HYPERV_TEST_DEV(d); MemoryRegion *io = isa_address_space_io(isa); - memset(dev->sint_route, 0, sizeof(dev->sint_route)); + QLIST_INIT(&dev->sint_routes); memory_region_init_io(&dev->sint_control, OBJECT(dev), &synic_test_sint_ops, dev, "hyperv-testdev-ctl", 4); From cc4669f06539679f2ed256b37dd741580add1ff9 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:18:30 +0300 Subject: [PATCH 24/47] hyperv_testdev: drop unnecessary includes Signed-off-by: Roman Kagan Message-Id: <20180921081836.29230-3-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- hw/misc/hyperv_testdev.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hw/misc/hyperv_testdev.c b/hw/misc/hyperv_testdev.c index 915d74e177..7291fb1c62 100644 --- a/hw/misc/hyperv_testdev.c +++ b/hw/misc/hyperv_testdev.c @@ -13,13 +13,9 @@ #include "qemu/osdep.h" #include "qemu/queue.h" -#include -#include "hw/hw.h" #include "hw/qdev.h" #include "hw/isa/isa.h" -#include "sysemu/kvm.h" #include "target/i386/hyperv.h" -#include "kvm_i386.h" typedef struct TestSintRoute { QLIST_ENTRY(TestSintRoute) le; From 42e4b0e1fb7f3ac793fe48adf2f1ad95b5782a6b Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:18:31 +0300 Subject: [PATCH 25/47] hyperv: cosmetic: g_malloc -> g_new Signed-off-by: Roman Kagan Message-Id: <20180921081836.29230-4-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- target/i386/hyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index 3065d765ed..47368b77c0 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -88,7 +88,7 @@ HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, HvSintRoute *sint_route; int r, gsi; - sint_route = g_malloc0(sizeof(*sint_route)); + sint_route = g_new0(HvSintRoute, 1); r = event_notifier_init(&sint_route->sint_set_notifier, false); if (r) { goto err; From bd4ed63caafe0bd14393db0d7ef3a70860ec6f2d Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:18:32 +0300 Subject: [PATCH 26/47] hyperv: synic: only setup ack notifier if there's a callback There's no point setting up an sint ack notifier if no callback is specified. Signed-off-by: Roman Kagan Message-Id: <20180921081836.29230-5-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- target/i386/hyperv.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index 47368b77c0..acdb0ca9df 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -77,15 +77,14 @@ static void kvm_hv_sint_ack_handler(EventNotifier *notifier) HvSintRoute *sint_route = container_of(notifier, HvSintRoute, sint_ack_notifier); event_notifier_test_and_clear(notifier); - if (sint_route->sint_ack_clb) { - sint_route->sint_ack_clb(sint_route); - } + sint_route->sint_ack_clb(sint_route); } HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, HvSintAckClb sint_ack_clb) { HvSintRoute *sint_route; + EventNotifier *ack_notifier; int r, gsi; sint_route = g_new0(HvSintRoute, 1); @@ -94,13 +93,15 @@ HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, goto err; } - r = event_notifier_init(&sint_route->sint_ack_notifier, false); - if (r) { - goto err_sint_set_notifier; - } + ack_notifier = sint_ack_clb ? &sint_route->sint_ack_notifier : NULL; + if (ack_notifier) { + r = event_notifier_init(ack_notifier, false); + if (r) { + goto err_sint_set_notifier; + } - event_notifier_set_handler(&sint_route->sint_ack_notifier, - kvm_hv_sint_ack_handler); + event_notifier_set_handler(ack_notifier, kvm_hv_sint_ack_handler); + } gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); if (gsi < 0) { @@ -109,7 +110,7 @@ HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &sint_route->sint_set_notifier, - &sint_route->sint_ack_notifier, gsi); + ack_notifier, gsi); if (r) { goto err_irqfd; } @@ -123,8 +124,10 @@ HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, err_irqfd: kvm_irqchip_release_virq(kvm_state, gsi); err_gsi: - event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); - event_notifier_cleanup(&sint_route->sint_ack_notifier); + if (ack_notifier) { + event_notifier_set_handler(ack_notifier, NULL); + event_notifier_cleanup(ack_notifier); + } err_sint_set_notifier: event_notifier_cleanup(&sint_route->sint_set_notifier); err: @@ -139,8 +142,10 @@ void kvm_hv_sint_route_destroy(HvSintRoute *sint_route) &sint_route->sint_set_notifier, sint_route->gsi); kvm_irqchip_release_virq(kvm_state, sint_route->gsi); - event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); - event_notifier_cleanup(&sint_route->sint_ack_notifier); + if (sint_route->sint_ack_clb) { + event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); + event_notifier_cleanup(&sint_route->sint_ack_notifier); + } event_notifier_cleanup(&sint_route->sint_set_notifier); g_free(sint_route); } From b56920245c1f708976b6f1bf8ddc4a1e9bd33059 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:18:33 +0300 Subject: [PATCH 27/47] hyperv: allow passing arbitrary data to sint ack callback Make sint ack callback accept an opaque pointer, that is stored on sint_route at creation time. This allows for more convenient interaction with the callback. Besides, nothing outside hyperv.c should need to know the layout of HvSintRoute fields any more so its declaration can be removed from the header. Signed-off-by: Roman Kagan Message-Id: <20180921081836.29230-6-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- hw/misc/hyperv_testdev.c | 2 +- target/i386/hyperv.c | 16 ++++++++++++++-- target/i386/hyperv.h | 15 +++------------ 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/hw/misc/hyperv_testdev.c b/hw/misc/hyperv_testdev.c index 7291fb1c62..1f32d3c9dd 100644 --- a/hw/misc/hyperv_testdev.c +++ b/hw/misc/hyperv_testdev.c @@ -52,7 +52,7 @@ static void sint_route_create(HypervTestDev *dev, sint_route->vp_index = vp_index; sint_route->sint = sint; - sint_route->sint_route = kvm_hv_sint_route_create(vp_index, sint, NULL); + sint_route->sint_route = kvm_hv_sint_route_create(vp_index, sint, NULL, NULL); assert(sint_route->sint_route); QLIST_INSERT_HEAD(&dev->sint_routes, sint_route, le); diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index acdb0ca9df..11fd1add2c 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -16,6 +16,16 @@ #include "hyperv.h" #include "hyperv-proto.h" +struct HvSintRoute { + uint32_t sint; + uint32_t vp_index; + int gsi; + EventNotifier sint_set_notifier; + EventNotifier sint_ack_notifier; + HvSintAckClb sint_ack_clb; + void *sint_ack_clb_data; +}; + uint32_t hyperv_vp_index(X86CPU *cpu) { return CPU(cpu)->cpu_index; @@ -77,11 +87,12 @@ static void kvm_hv_sint_ack_handler(EventNotifier *notifier) HvSintRoute *sint_route = container_of(notifier, HvSintRoute, sint_ack_notifier); event_notifier_test_and_clear(notifier); - sint_route->sint_ack_clb(sint_route); + sint_route->sint_ack_clb(sint_route->sint_ack_clb_data); } HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb) + HvSintAckClb sint_ack_clb, + void *sint_ack_clb_data) { HvSintRoute *sint_route; EventNotifier *ack_notifier; @@ -116,6 +127,7 @@ HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, } sint_route->gsi = gsi; sint_route->sint_ack_clb = sint_ack_clb; + sint_route->sint_ack_clb_data = sint_ack_clb_data; sint_route->vp_index = vp_index; sint_route->sint = sint; diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h index 00c9b454bb..ab99047bf8 100644 --- a/target/i386/hyperv.h +++ b/target/i386/hyperv.h @@ -16,24 +16,15 @@ #include "cpu.h" #include "sysemu/kvm.h" -#include "qemu/event_notifier.h" typedef struct HvSintRoute HvSintRoute; -typedef void (*HvSintAckClb)(HvSintRoute *sint_route); - -struct HvSintRoute { - uint32_t sint; - uint32_t vp_index; - int gsi; - EventNotifier sint_set_notifier; - EventNotifier sint_ack_notifier; - HvSintAckClb sint_ack_clb; -}; +typedef void (*HvSintAckClb)(void *data); int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit); HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb); + HvSintAckClb sint_ack_clb, + void *sint_ack_clb_data); void kvm_hv_sint_route_destroy(HvSintRoute *sint_route); From 09cfb2f6350512eb3ef7e9e2136daabbcf265880 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:18:34 +0300 Subject: [PATCH 28/47] hyperv: address HvSintRoute by X86CPU pointer Use X86CPU pointer to refer to the respective HvSintRoute instead of vp_index. This is more convenient and also paves the way for future enhancements. Signed-off-by: Roman Kagan Message-Id: <20180921081836.29230-7-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- target/i386/hyperv.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index 11fd1add2c..0ce8a7aa2f 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -18,7 +18,7 @@ struct HvSintRoute { uint32_t sint; - uint32_t vp_index; + X86CPU *cpu; int gsi; EventNotifier sint_set_notifier; EventNotifier sint_ack_notifier; @@ -97,6 +97,12 @@ HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, HvSintRoute *sint_route; EventNotifier *ack_notifier; int r, gsi; + X86CPU *cpu; + + cpu = hyperv_find_vcpu(vp_index); + if (!cpu) { + return NULL; + } sint_route = g_new0(HvSintRoute, 1); r = event_notifier_init(&sint_route->sint_set_notifier, false); @@ -128,7 +134,7 @@ HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, sint_route->gsi = gsi; sint_route->sint_ack_clb = sint_ack_clb; sint_route->sint_ack_clb_data = sint_ack_clb_data; - sint_route->vp_index = vp_index; + sint_route->cpu = cpu; sint_route->sint = sint; return sint_route; From 3d3e6e85c3ebd5cd551133af9bace084360307a3 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:18:35 +0300 Subject: [PATCH 29/47] hyperv: make HvSintRoute reference-counted Multiple entities (e.g. VMBus devices) can use the same SINT route. To make their lives easier in maintaining SINT route ownership, make it reference-counted. Adjust the respective API names accordingly. Signed-off-by: Roman Kagan Message-Id: <20180921081836.29230-8-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- hw/misc/hyperv_testdev.c | 4 ++-- target/i386/hyperv.c | 25 +++++++++++++++++++++---- target/i386/hyperv.h | 10 +++++----- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/hw/misc/hyperv_testdev.c b/hw/misc/hyperv_testdev.c index 1f32d3c9dd..dbf4e7e4ab 100644 --- a/hw/misc/hyperv_testdev.c +++ b/hw/misc/hyperv_testdev.c @@ -52,7 +52,7 @@ static void sint_route_create(HypervTestDev *dev, sint_route->vp_index = vp_index; sint_route->sint = sint; - sint_route->sint_route = kvm_hv_sint_route_create(vp_index, sint, NULL, NULL); + sint_route->sint_route = hyperv_sint_route_new(vp_index, sint, NULL, NULL); assert(sint_route->sint_route); QLIST_INSERT_HEAD(&dev->sint_routes, sint_route, le); @@ -79,7 +79,7 @@ static void sint_route_destroy(HypervTestDev *dev, sint_route = sint_route_find(dev, vp_index, sint); QLIST_REMOVE(sint_route, le); - kvm_hv_sint_route_destroy(sint_route->sint_route); + hyperv_sint_route_unref(sint_route->sint_route); g_free(sint_route); } diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index 0ce8a7aa2f..4d8ef6f2da 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -24,6 +24,7 @@ struct HvSintRoute { EventNotifier sint_ack_notifier; HvSintAckClb sint_ack_clb; void *sint_ack_clb_data; + unsigned refcount; }; uint32_t hyperv_vp_index(X86CPU *cpu) @@ -90,9 +91,9 @@ static void kvm_hv_sint_ack_handler(EventNotifier *notifier) sint_route->sint_ack_clb(sint_route->sint_ack_clb_data); } -HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb, - void *sint_ack_clb_data) +HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, + HvSintAckClb sint_ack_clb, + void *sint_ack_clb_data) { HvSintRoute *sint_route; EventNotifier *ack_notifier; @@ -136,6 +137,7 @@ HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, sint_route->sint_ack_clb_data = sint_ack_clb_data; sint_route->cpu = cpu; sint_route->sint = sint; + sint_route->refcount = 1; return sint_route; @@ -154,8 +156,23 @@ err: return NULL; } -void kvm_hv_sint_route_destroy(HvSintRoute *sint_route) +void hyperv_sint_route_ref(HvSintRoute *sint_route) { + sint_route->refcount++; +} + +void hyperv_sint_route_unref(HvSintRoute *sint_route) +{ + if (!sint_route) { + return; + } + + assert(sint_route->refcount > 0); + + if (--sint_route->refcount) { + return; + } + kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &sint_route->sint_set_notifier, sint_route->gsi); diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h index ab99047bf8..cdf44a7757 100644 --- a/target/i386/hyperv.h +++ b/target/i386/hyperv.h @@ -22,11 +22,11 @@ typedef void (*HvSintAckClb)(void *data); int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit); -HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb, - void *sint_ack_clb_data); - -void kvm_hv_sint_route_destroy(HvSintRoute *sint_route); +HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, + HvSintAckClb sint_ack_clb, + void *sint_ack_clb_data); +void hyperv_sint_route_ref(HvSintRoute *sint_route); +void hyperv_sint_route_unref(HvSintRoute *sint_route); int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route); From 2486cb8eba919980091c754256cd014342738a6a Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:18:36 +0300 Subject: [PATCH 30/47] hyperv: rename kvm_hv_sint_route_set_sint There's nothing kvm-specific in it so follow the suite and replace "kvm_hv" prefix with "hyperv". Signed-off-by: Roman Kagan Message-Id: <20180921081836.29230-9-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- hw/misc/hyperv_testdev.c | 2 +- target/i386/hyperv.c | 2 +- target/i386/hyperv.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/misc/hyperv_testdev.c b/hw/misc/hyperv_testdev.c index dbf4e7e4ab..33bbd286bc 100644 --- a/hw/misc/hyperv_testdev.c +++ b/hw/misc/hyperv_testdev.c @@ -90,7 +90,7 @@ static void sint_route_set_sint(HypervTestDev *dev, sint_route = sint_route_find(dev, vp_index, sint); - kvm_hv_sint_route_set_sint(sint_route->sint_route); + hyperv_sint_route_set_sint(sint_route->sint_route); } static uint64_t hv_test_dev_read(void *opaque, hwaddr addr, unsigned size) diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index 4d8ef6f2da..fc537e7ca0 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -185,7 +185,7 @@ void hyperv_sint_route_unref(HvSintRoute *sint_route) g_free(sint_route); } -int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route) +int hyperv_sint_route_set_sint(HvSintRoute *sint_route) { return event_notifier_set(&sint_route->sint_set_notifier); } diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h index cdf44a7757..59e9f9a1e1 100644 --- a/target/i386/hyperv.h +++ b/target/i386/hyperv.h @@ -28,7 +28,7 @@ HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, void hyperv_sint_route_ref(HvSintRoute *sint_route); void hyperv_sint_route_unref(HvSintRoute *sint_route); -int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route); +int hyperv_sint_route_set_sint(HvSintRoute *sint_route); uint32_t hyperv_vp_index(X86CPU *cpu); X86CPU *hyperv_find_vcpu(uint32_t vp_index); From 5116122af70357d895ecc61c0211dbf786226081 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:20:37 +0300 Subject: [PATCH 31/47] hyperv: split hyperv-proto.h into x86 and arch-independent parts Some parts of the Hyper-V hypervisor-guest interface appear to be target-independent, so move them into a proper header. Not that Hyper-V ARM64 emulation is around the corner but it seems more conveninent to have most of Hyper-V and VMBus target-independent, and allows to avoid conflicts with inclusion of arch-specific headers down the road in VMBus implementation. Signed-off-by: Roman Kagan Message-Id: <20180921082041.29380-2-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- include/hw/hyperv/hyperv-proto.h | 129 +++++++++++++++++++++++++++++++ target/i386/hyperv-proto.h | 115 +-------------------------- 2 files changed, 132 insertions(+), 112 deletions(-) create mode 100644 include/hw/hyperv/hyperv-proto.h diff --git a/include/hw/hyperv/hyperv-proto.h b/include/hw/hyperv/hyperv-proto.h new file mode 100644 index 0000000000..2dc78eeafb --- /dev/null +++ b/include/hw/hyperv/hyperv-proto.h @@ -0,0 +1,129 @@ +/* + * Definitions for Hyper-V guest/hypervisor interaction + * + * Copyright (c) 2017-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_HYPERV_PROTO_H +#define HW_HYPERV_HYPERV_PROTO_H + +#include "qemu/bitmap.h" + +/* + * Hypercall status code + */ +#define HV_STATUS_SUCCESS 0 +#define HV_STATUS_INVALID_HYPERCALL_CODE 2 +#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 +#define HV_STATUS_INVALID_ALIGNMENT 4 +#define HV_STATUS_INVALID_PARAMETER 5 +#define HV_STATUS_INSUFFICIENT_MEMORY 11 +#define HV_STATUS_INVALID_CONNECTION_ID 18 +#define HV_STATUS_INSUFFICIENT_BUFFERS 19 + +/* + * Hypercall numbers + */ +#define HV_POST_MESSAGE 0x005c +#define HV_SIGNAL_EVENT 0x005d +#define HV_HYPERCALL_FAST (1u << 16) + +/* + * Message size + */ +#define HV_MESSAGE_PAYLOAD_SIZE 240 + +/* + * Message types + */ +#define HV_MESSAGE_NONE 0x00000000 +#define HV_MESSAGE_VMBUS 0x00000001 +#define HV_MESSAGE_UNMAPPED_GPA 0x80000000 +#define HV_MESSAGE_GPA_INTERCEPT 0x80000001 +#define HV_MESSAGE_TIMER_EXPIRED 0x80000010 +#define HV_MESSAGE_INVALID_VP_REGISTER_VALUE 0x80000020 +#define HV_MESSAGE_UNRECOVERABLE_EXCEPTION 0x80000021 +#define HV_MESSAGE_UNSUPPORTED_FEATURE 0x80000022 +#define HV_MESSAGE_EVENTLOG_BUFFERCOMPLETE 0x80000040 +#define HV_MESSAGE_X64_IOPORT_INTERCEPT 0x80010000 +#define HV_MESSAGE_X64_MSR_INTERCEPT 0x80010001 +#define HV_MESSAGE_X64_CPUID_INTERCEPT 0x80010002 +#define HV_MESSAGE_X64_EXCEPTION_INTERCEPT 0x80010003 +#define HV_MESSAGE_X64_APIC_EOI 0x80010004 +#define HV_MESSAGE_X64_LEGACY_FP_ERROR 0x80010005 + +/* + * Message flags + */ +#define HV_MESSAGE_FLAG_PENDING 0x1 + +/* + * Number of synthetic interrupts + */ +#define HV_SINT_COUNT 16 + +/* + * Event flags number per SINT + */ +#define HV_EVENT_FLAGS_COUNT (256 * 8) + +/* + * Connection id valid bits + */ +#define HV_CONNECTION_ID_MASK 0x00ffffff + +/* + * Input structure for POST_MESSAGE hypercall + */ +struct hyperv_post_message_input { + uint32_t connection_id; + uint32_t _reserved; + uint32_t message_type; + uint32_t payload_size; + uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; +}; + +/* + * Input structure for SIGNAL_EVENT hypercall + */ +struct hyperv_signal_event_input { + uint32_t connection_id; + uint16_t flag_number; + uint16_t _reserved_zero; +}; + +/* + * SynIC message structures + */ +struct hyperv_message_header { + uint32_t message_type; + uint8_t payload_size; + uint8_t message_flags; /* HV_MESSAGE_FLAG_XX */ + uint8_t _reserved[2]; + uint64_t sender; +}; + +struct hyperv_message { + struct hyperv_message_header header; + uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; +}; + +struct hyperv_message_page { + struct hyperv_message slot[HV_SINT_COUNT]; +}; + +/* + * SynIC event flags structures + */ +struct hyperv_event_flags { + DECLARE_BITMAP(flags, HV_EVENT_FLAGS_COUNT); +}; + +struct hyperv_event_flags_page { + struct hyperv_event_flags slot[HV_SINT_COUNT]; +}; + +#endif diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index 87f36d14e8..8c572cd7c2 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -1,7 +1,7 @@ /* - * Definitions for Hyper-V guest/hypervisor interaction + * Definitions for Hyper-V guest/hypervisor interaction - x86-specific part * - * Copyright (C) 2017 Parallels International GmbH + * Copyright (c) 2017-2018 Virtuozzo International GmbH. * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. @@ -10,7 +10,7 @@ #ifndef TARGET_I386_HYPERV_PROTO_H #define TARGET_I386_HYPERV_PROTO_H -#include "qemu/bitmap.h" +#include "hw/hyperv/hyperv-proto.h" #define HV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 #define HV_CPUID_INTERFACE 0x40000001 @@ -138,25 +138,6 @@ #define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 #define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 -/* - * Hypercall status code - */ -#define HV_STATUS_SUCCESS 0 -#define HV_STATUS_INVALID_HYPERCALL_CODE 2 -#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 -#define HV_STATUS_INVALID_ALIGNMENT 4 -#define HV_STATUS_INVALID_PARAMETER 5 -#define HV_STATUS_INSUFFICIENT_MEMORY 11 -#define HV_STATUS_INVALID_CONNECTION_ID 18 -#define HV_STATUS_INSUFFICIENT_BUFFERS 19 - -/* - * Hypercall numbers - */ -#define HV_POST_MESSAGE 0x005c -#define HV_SIGNAL_EVENT 0x005d -#define HV_HYPERCALL_FAST (1u << 16) - /* * Hypercall MSR bits */ @@ -166,7 +147,6 @@ * Synthetic interrupt controller definitions */ #define HV_SYNIC_VERSION 1 -#define HV_SINT_COUNT 16 #define HV_SYNIC_ENABLE (1u << 0) #define HV_SIMP_ENABLE (1u << 0) #define HV_SIEFP_ENABLE (1u << 0) @@ -176,94 +156,5 @@ #define HV_STIMER_COUNT 4 -/* - * Message size - */ -#define HV_MESSAGE_PAYLOAD_SIZE 240 - -/* - * Message types - */ -#define HV_MESSAGE_NONE 0x00000000 -#define HV_MESSAGE_VMBUS 0x00000001 -#define HV_MESSAGE_UNMAPPED_GPA 0x80000000 -#define HV_MESSAGE_GPA_INTERCEPT 0x80000001 -#define HV_MESSAGE_TIMER_EXPIRED 0x80000010 -#define HV_MESSAGE_INVALID_VP_REGISTER_VALUE 0x80000020 -#define HV_MESSAGE_UNRECOVERABLE_EXCEPTION 0x80000021 -#define HV_MESSAGE_UNSUPPORTED_FEATURE 0x80000022 -#define HV_MESSAGE_EVENTLOG_BUFFERCOMPLETE 0x80000040 -#define HV_MESSAGE_X64_IOPORT_INTERCEPT 0x80010000 -#define HV_MESSAGE_X64_MSR_INTERCEPT 0x80010001 -#define HV_MESSAGE_X64_CPUID_INTERCEPT 0x80010002 -#define HV_MESSAGE_X64_EXCEPTION_INTERCEPT 0x80010003 -#define HV_MESSAGE_X64_APIC_EOI 0x80010004 -#define HV_MESSAGE_X64_LEGACY_FP_ERROR 0x80010005 - -/* - * Message flags - */ -#define HV_MESSAGE_FLAG_PENDING 0x1 - -/* - * Event flags number per SINT - */ -#define HV_EVENT_FLAGS_COUNT (256 * 8) - -/* - * Connection id valid bits - */ -#define HV_CONNECTION_ID_MASK 0x00ffffff - -/* - * Input structure for POST_MESSAGE hypercall - */ -struct hyperv_post_message_input { - uint32_t connection_id; - uint32_t _reserved; - uint32_t message_type; - uint32_t payload_size; - uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; -}; - -/* - * Input structure for SIGNAL_EVENT hypercall - */ -struct hyperv_signal_event_input { - uint32_t connection_id; - uint16_t flag_number; - uint16_t _reserved_zero; -}; - -/* - * SynIC message structures - */ -struct hyperv_message_header { - uint32_t message_type; - uint8_t payload_size; - uint8_t message_flags; /* HV_MESSAGE_FLAG_XX */ - uint8_t _reserved[2]; - uint64_t sender; -}; - -struct hyperv_message { - struct hyperv_message_header header; - uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; -}; - -struct hyperv_message_page { - struct hyperv_message slot[HV_SINT_COUNT]; -}; - -/* - * SynIC event flags structures - */ -struct hyperv_event_flags { - DECLARE_BITMAP(flags, HV_EVENT_FLAGS_COUNT); -}; - -struct hyperv_event_flags_page { - struct hyperv_event_flags slot[HV_SINT_COUNT]; -}; #endif From 51f0ac63812abf41d3a9284a272c6275d7318705 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:20:38 +0300 Subject: [PATCH 32/47] hyperv: make hyperv_vp_index inline Also make the inverse function, hyperv_find_vcpu, static as it's not used outside hyperv.c This paves the way to making hyperv.c built optionally. Signed-off-by: Roman Kagan Message-Id: <20180921082041.29380-3-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- target/i386/hyperv.c | 11 ++++------- target/i386/hyperv.h | 6 ++++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index fc537e7ca0..68816642c9 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -27,14 +27,11 @@ struct HvSintRoute { unsigned refcount; }; -uint32_t hyperv_vp_index(X86CPU *cpu) +static X86CPU *hyperv_find_vcpu(uint32_t vp_index) { - return CPU(cpu)->cpu_index; -} - -X86CPU *hyperv_find_vcpu(uint32_t vp_index) -{ - return X86_CPU(qemu_get_cpu(vp_index)); + X86CPU *cpu = X86_CPU(qemu_get_cpu(vp_index)); + assert(hyperv_vp_index(cpu) == vp_index); + return cpu; } int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h index 59e9f9a1e1..8d4619c078 100644 --- a/target/i386/hyperv.h +++ b/target/i386/hyperv.h @@ -30,7 +30,9 @@ void hyperv_sint_route_unref(HvSintRoute *sint_route); int hyperv_sint_route_set_sint(HvSintRoute *sint_route); -uint32_t hyperv_vp_index(X86CPU *cpu); -X86CPU *hyperv_find_vcpu(uint32_t vp_index); +static inline uint32_t hyperv_vp_index(X86CPU *cpu) +{ + return CPU(cpu)->cpu_index; +} #endif From 701189e31140a7c82ec02a7f4ca632cfd6a8559d Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:20:39 +0300 Subject: [PATCH 33/47] hyperv: factor out arch-independent API into hw/hyperv A significant part of hyperv.c is not actually tied to x86, and can be moved to hw/. This will allow to maintain most of Hyper-V and VMBus target-independent, and to avoid conflicts with inclusion of arch-specific headers down the road in VMBus implementation. Also this stuff can now be opt-out with CONFIG_HYPERV. Signed-off-by: Roman Kagan Message-Id: <20180921082041.29380-4-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- hw/Makefile.objs | 1 + hw/hyperv/Makefile.objs | 2 + hw/hyperv/hyperv.c | 138 +++++++++++++++++++++++++++ hw/{misc => hyperv}/hyperv_testdev.c | 2 +- hw/misc/Makefile.objs | 1 - include/hw/hyperv/hyperv.h | 31 ++++++ target/i386/hyperv.c | 127 +----------------------- target/i386/hyperv.h | 17 +--- target/i386/kvm.c | 5 +- 9 files changed, 178 insertions(+), 146 deletions(-) create mode 100644 hw/hyperv/Makefile.objs create mode 100644 hw/hyperv/hyperv.c rename hw/{misc => hyperv}/hyperv_testdev.c (99%) create mode 100644 include/hw/hyperv/hyperv.h diff --git a/hw/Makefile.objs b/hw/Makefile.objs index a19c1417ed..30722ccf98 100644 --- a/hw/Makefile.objs +++ b/hw/Makefile.objs @@ -9,6 +9,7 @@ devices-dirs-$(CONFIG_SOFTMMU) += cpu/ devices-dirs-$(CONFIG_SOFTMMU) += display/ devices-dirs-$(CONFIG_SOFTMMU) += dma/ devices-dirs-$(CONFIG_SOFTMMU) += gpio/ +devices-dirs-$(CONFIG_HYPERV) += hyperv/ devices-dirs-$(CONFIG_SOFTMMU) += i2c/ devices-dirs-$(CONFIG_SOFTMMU) += ide/ devices-dirs-$(CONFIG_SOFTMMU) += input/ diff --git a/hw/hyperv/Makefile.objs b/hw/hyperv/Makefile.objs new file mode 100644 index 0000000000..edaca2f763 --- /dev/null +++ b/hw/hyperv/Makefile.objs @@ -0,0 +1,2 @@ +obj-y += hyperv.o +obj-$(CONFIG_HYPERV_TESTDEV) += hyperv_testdev.o diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c new file mode 100644 index 0000000000..97db87561e --- /dev/null +++ b/hw/hyperv/hyperv.c @@ -0,0 +1,138 @@ +/* + * Hyper-V guest/hypervisor interaction + * + * Copyright (c) 2015-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "sysemu/kvm.h" +#include "hw/hyperv/hyperv.h" + +struct HvSintRoute { + uint32_t sint; + CPUState *cs; + int gsi; + EventNotifier sint_set_notifier; + EventNotifier sint_ack_notifier; + HvSintAckClb sint_ack_clb; + void *sint_ack_clb_data; + unsigned refcount; +}; + +static CPUState *hyperv_find_vcpu(uint32_t vp_index) +{ + CPUState *cs = qemu_get_cpu(vp_index); + assert(hyperv_vp_index(cs) == vp_index); + return cs; +} + +static void kvm_hv_sint_ack_handler(EventNotifier *notifier) +{ + HvSintRoute *sint_route = container_of(notifier, HvSintRoute, + sint_ack_notifier); + event_notifier_test_and_clear(notifier); + sint_route->sint_ack_clb(sint_route->sint_ack_clb_data); +} + +HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, + HvSintAckClb sint_ack_clb, + void *sint_ack_clb_data) +{ + HvSintRoute *sint_route; + EventNotifier *ack_notifier; + int r, gsi; + CPUState *cs; + + cs = hyperv_find_vcpu(vp_index); + if (!cs) { + return NULL; + } + + sint_route = g_new0(HvSintRoute, 1); + r = event_notifier_init(&sint_route->sint_set_notifier, false); + if (r) { + goto err; + } + + ack_notifier = sint_ack_clb ? &sint_route->sint_ack_notifier : NULL; + if (ack_notifier) { + r = event_notifier_init(ack_notifier, false); + if (r) { + goto err_sint_set_notifier; + } + + event_notifier_set_handler(ack_notifier, kvm_hv_sint_ack_handler); + } + + gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); + if (gsi < 0) { + goto err_gsi; + } + + r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, + &sint_route->sint_set_notifier, + ack_notifier, gsi); + if (r) { + goto err_irqfd; + } + sint_route->gsi = gsi; + sint_route->sint_ack_clb = sint_ack_clb; + sint_route->sint_ack_clb_data = sint_ack_clb_data; + sint_route->cs = cs; + sint_route->sint = sint; + sint_route->refcount = 1; + + return sint_route; + +err_irqfd: + kvm_irqchip_release_virq(kvm_state, gsi); +err_gsi: + if (ack_notifier) { + event_notifier_set_handler(ack_notifier, NULL); + event_notifier_cleanup(ack_notifier); + } +err_sint_set_notifier: + event_notifier_cleanup(&sint_route->sint_set_notifier); +err: + g_free(sint_route); + + return NULL; +} + +void hyperv_sint_route_ref(HvSintRoute *sint_route) +{ + sint_route->refcount++; +} + +void hyperv_sint_route_unref(HvSintRoute *sint_route) +{ + if (!sint_route) { + return; + } + + assert(sint_route->refcount > 0); + + if (--sint_route->refcount) { + return; + } + + kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, + &sint_route->sint_set_notifier, + sint_route->gsi); + kvm_irqchip_release_virq(kvm_state, sint_route->gsi); + if (sint_route->sint_ack_clb) { + event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); + event_notifier_cleanup(&sint_route->sint_ack_notifier); + } + event_notifier_cleanup(&sint_route->sint_set_notifier); + g_free(sint_route); +} + +int hyperv_sint_route_set_sint(HvSintRoute *sint_route) +{ + return event_notifier_set(&sint_route->sint_set_notifier); +} diff --git a/hw/misc/hyperv_testdev.c b/hw/hyperv/hyperv_testdev.c similarity index 99% rename from hw/misc/hyperv_testdev.c rename to hw/hyperv/hyperv_testdev.c index 33bbd286bc..fc3f6c5666 100644 --- a/hw/misc/hyperv_testdev.c +++ b/hw/hyperv/hyperv_testdev.c @@ -15,7 +15,7 @@ #include "qemu/queue.h" #include "hw/qdev.h" #include "hw/isa/isa.h" -#include "target/i386/hyperv.h" +#include "hw/hyperv/hyperv.h" typedef struct TestSintRoute { QLIST_ENTRY(TestSintRoute) le; diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs index 6d50b03cfd..680350b3c3 100644 --- a/hw/misc/Makefile.objs +++ b/hw/misc/Makefile.objs @@ -71,7 +71,6 @@ obj-$(CONFIG_IOTKIT_SYSCTL) += iotkit-sysctl.o obj-$(CONFIG_IOTKIT_SYSINFO) += iotkit-sysinfo.o obj-$(CONFIG_PVPANIC) += pvpanic.o -obj-$(CONFIG_HYPERV_TESTDEV) += hyperv_testdev.o obj-$(CONFIG_AUX) += auxbus.o obj-$(CONFIG_ASPEED_SOC) += aspeed_scu.o aspeed_sdmc.o obj-$(CONFIG_MSF2) += msf2-sysreg.o diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h new file mode 100644 index 0000000000..d6c8d78353 --- /dev/null +++ b/include/hw/hyperv/hyperv.h @@ -0,0 +1,31 @@ +/* + * Hyper-V guest/hypervisor interaction + * + * Copyright (c) 2015-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_HYPERV_H +#define HW_HYPERV_HYPERV_H + +#include "cpu-qom.h" + +typedef struct HvSintRoute HvSintRoute; +typedef void (*HvSintAckClb)(void *data); + +HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, + HvSintAckClb sint_ack_clb, + void *sint_ack_clb_data); +void hyperv_sint_route_ref(HvSintRoute *sint_route); +void hyperv_sint_route_unref(HvSintRoute *sint_route); + +int hyperv_sint_route_set_sint(HvSintRoute *sint_route); + +static inline uint32_t hyperv_vp_index(CPUState *cs) +{ + return cs->cpu_index; +} + +#endif diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index 68816642c9..1eac727774 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -12,28 +12,10 @@ */ #include "qemu/osdep.h" -#include "qemu/main-loop.h" #include "hyperv.h" +#include "hw/hyperv/hyperv.h" #include "hyperv-proto.h" -struct HvSintRoute { - uint32_t sint; - X86CPU *cpu; - int gsi; - EventNotifier sint_set_notifier; - EventNotifier sint_ack_notifier; - HvSintAckClb sint_ack_clb; - void *sint_ack_clb_data; - unsigned refcount; -}; - -static X86CPU *hyperv_find_vcpu(uint32_t vp_index) -{ - X86CPU *cpu = X86_CPU(qemu_get_cpu(vp_index)); - assert(hyperv_vp_index(cpu) == vp_index); - return cpu; -} - int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) { CPUX86State *env = &cpu->env; @@ -79,110 +61,3 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) return -1; } } - -static void kvm_hv_sint_ack_handler(EventNotifier *notifier) -{ - HvSintRoute *sint_route = container_of(notifier, HvSintRoute, - sint_ack_notifier); - event_notifier_test_and_clear(notifier); - sint_route->sint_ack_clb(sint_route->sint_ack_clb_data); -} - -HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb, - void *sint_ack_clb_data) -{ - HvSintRoute *sint_route; - EventNotifier *ack_notifier; - int r, gsi; - X86CPU *cpu; - - cpu = hyperv_find_vcpu(vp_index); - if (!cpu) { - return NULL; - } - - sint_route = g_new0(HvSintRoute, 1); - r = event_notifier_init(&sint_route->sint_set_notifier, false); - if (r) { - goto err; - } - - ack_notifier = sint_ack_clb ? &sint_route->sint_ack_notifier : NULL; - if (ack_notifier) { - r = event_notifier_init(ack_notifier, false); - if (r) { - goto err_sint_set_notifier; - } - - event_notifier_set_handler(ack_notifier, kvm_hv_sint_ack_handler); - } - - gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); - if (gsi < 0) { - goto err_gsi; - } - - r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, - &sint_route->sint_set_notifier, - ack_notifier, gsi); - if (r) { - goto err_irqfd; - } - sint_route->gsi = gsi; - sint_route->sint_ack_clb = sint_ack_clb; - sint_route->sint_ack_clb_data = sint_ack_clb_data; - sint_route->cpu = cpu; - sint_route->sint = sint; - sint_route->refcount = 1; - - return sint_route; - -err_irqfd: - kvm_irqchip_release_virq(kvm_state, gsi); -err_gsi: - if (ack_notifier) { - event_notifier_set_handler(ack_notifier, NULL); - event_notifier_cleanup(ack_notifier); - } -err_sint_set_notifier: - event_notifier_cleanup(&sint_route->sint_set_notifier); -err: - g_free(sint_route); - - return NULL; -} - -void hyperv_sint_route_ref(HvSintRoute *sint_route) -{ - sint_route->refcount++; -} - -void hyperv_sint_route_unref(HvSintRoute *sint_route) -{ - if (!sint_route) { - return; - } - - assert(sint_route->refcount > 0); - - if (--sint_route->refcount) { - return; - } - - kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, - &sint_route->sint_set_notifier, - sint_route->gsi); - kvm_irqchip_release_virq(kvm_state, sint_route->gsi); - if (sint_route->sint_ack_clb) { - event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); - event_notifier_cleanup(&sint_route->sint_ack_notifier); - } - event_notifier_cleanup(&sint_route->sint_set_notifier); - g_free(sint_route); -} - -int hyperv_sint_route_set_sint(HvSintRoute *sint_route) -{ - return event_notifier_set(&sint_route->sint_set_notifier); -} diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h index 8d4619c078..5c49251ecb 100644 --- a/target/i386/hyperv.h +++ b/target/i386/hyperv.h @@ -16,23 +16,8 @@ #include "cpu.h" #include "sysemu/kvm.h" - -typedef struct HvSintRoute HvSintRoute; -typedef void (*HvSintAckClb)(void *data); +#include "hw/hyperv/hyperv.h" int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit); -HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb, - void *sint_ack_clb_data); -void hyperv_sint_route_ref(HvSintRoute *sint_route); -void hyperv_sint_route_unref(HvSintRoute *sint_route); - -int hyperv_sint_route_set_sint(HvSintRoute *sint_route); - -static inline uint32_t hyperv_vp_index(X86CPU *cpu) -{ - return CPU(cpu)->cpu_index; -} - #endif diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 4e62b5c39b..b0b42d2991 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -774,7 +774,7 @@ static int hyperv_init_vcpu(X86CPU *cpu) } assert(ret == 1); - if (msr_data.entries[0].data != hyperv_vp_index(cpu)) { + if (msr_data.entries[0].data != hyperv_vp_index(CPU(cpu))) { error_report("kernel's vp_index != QEMU's vp_index"); return -ENXIO; } @@ -1949,7 +1949,8 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, env->msr_hv_runtime); } if (cpu->hyperv_vpindex && hv_vpindex_settable) { - kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX, hyperv_vp_index(cpu)); + kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX, + hyperv_vp_index(CPU(cpu))); } if (cpu->hyperv_synic) { int j; From 02ef67240b4cd4a33e8e4d118fc827e6678b7d80 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:20:40 +0300 Subject: [PATCH 34/47] default-configs: collect CONFIG_HYPERV* in hyperv.mak Accumulate HYPERV config options in a dedicated file. There are only two so far; more will be added later. Signed-off-by: Roman Kagan Message-Id: <20180921082041.29380-5-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- default-configs/hyperv.mak | 2 ++ default-configs/i386-softmmu.mak | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 default-configs/hyperv.mak diff --git a/default-configs/hyperv.mak b/default-configs/hyperv.mak new file mode 100644 index 0000000000..5d0d9fd830 --- /dev/null +++ b/default-configs/hyperv.mak @@ -0,0 +1,2 @@ +CONFIG_HYPERV=$(CONFIG_KVM) +CONFIG_HYPERV_TESTDEV=y diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index 8c7d4a0fa0..210cff2781 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -3,6 +3,7 @@ include pci.mak include sound.mak include usb.mak +include hyperv.mak CONFIG_QXL=$(CONFIG_SPICE) CONFIG_VGA_ISA=y CONFIG_VGA_CIRRUS=y @@ -58,7 +59,6 @@ CONFIG_XIO3130=y CONFIG_IOH3420=y CONFIG_I82801B11=y CONFIG_SMBIOS=y -CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM) CONFIG_PXB=y CONFIG_ACPI_VMGENID=y CONFIG_FW_CFG_DMA=y From 8417442a34f78f1e1dda87aff27c8a42124f4d42 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:20:41 +0300 Subject: [PATCH 35/47] i386: add hyperv-stub for CONFIG_HYPERV=n This will allow to build slightly leaner QEMU that supports some HyperV features of KVM (e.g. SynIC timers, PV spinlocks, APIC assists, etc.) but nothing else on the QEMU side. Signed-off-by: Roman Kagan Message-Id: <20180921082041.29380-6-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- target/i386/Makefile.objs | 17 ++++++++++------- target/i386/hyperv-stub.c | 35 +++++++++++++++++++++++++++++++++++ target/i386/hyperv.h | 2 ++ 3 files changed, 47 insertions(+), 7 deletions(-) create mode 100644 target/i386/hyperv-stub.c diff --git a/target/i386/Makefile.objs b/target/i386/Makefile.objs index 04678f5503..32bf966300 100644 --- a/target/i386/Makefile.objs +++ b/target/i386/Makefile.objs @@ -3,17 +3,20 @@ obj-$(CONFIG_TCG) += translate.o obj-$(CONFIG_TCG) += bpt_helper.o cc_helper.o excp_helper.o fpu_helper.o obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o mpx_helper.o obj-$(CONFIG_TCG) += seg_helper.o smm_helper.o svm_helper.o -obj-$(CONFIG_SOFTMMU) += machine.o arch_memory_mapping.o arch_dump.o monitor.o -obj-$(CONFIG_KVM) += kvm.o hyperv.o -obj-$(CONFIG_SEV) += sev.o +ifeq ($(CONFIG_SOFTMMU),y) +obj-y += machine.o arch_memory_mapping.o arch_dump.o monitor.o +obj-$(CONFIG_KVM) += kvm.o obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o -obj-$(call lnot,$(CONFIG_SEV)) += sev-stub.o -# HAX support -ifdef CONFIG_WIN32 +obj-$(CONFIG_HYPERV) += hyperv.o +obj-$(call lnot,$(CONFIG_HYPERV)) += hyperv-stub.o +ifeq ($(CONFIG_WIN32),y) obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-windows.o endif -ifdef CONFIG_DARWIN +ifeq ($(CONFIG_DARWIN),y) obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-darwin.o obj-$(CONFIG_HVF) += hvf/ endif obj-$(CONFIG_WHPX) += whpx-all.o +endif +obj-$(CONFIG_SEV) += sev.o +obj-$(call lnot,$(CONFIG_SEV)) += sev-stub.o diff --git a/target/i386/hyperv-stub.c b/target/i386/hyperv-stub.c new file mode 100644 index 0000000000..5919ba851c --- /dev/null +++ b/target/i386/hyperv-stub.c @@ -0,0 +1,35 @@ +/* + * Stubs for CONFIG_HYPERV=n + * + * Copyright (c) 2015-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hyperv.h" + +#ifdef CONFIG_KVM +int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) +{ + switch (exit->type) { + case KVM_EXIT_HYPERV_SYNIC: + if (!cpu->hyperv_synic) { + return -1; + } + + /* + * Tracking the changes in the MSRs is unnecessary as there are no + * users for them beside save/load, which is handled nicely by the + * generic MSR save/load code + */ + return 0; + case KVM_EXIT_HYPERV_HCALL: + exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE; + return 0; + default: + return -1; + } +} +#endif diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h index 5c49251ecb..f0a27c3d73 100644 --- a/target/i386/hyperv.h +++ b/target/i386/hyperv.h @@ -18,6 +18,8 @@ #include "sysemu/kvm.h" #include "hw/hyperv/hyperv.h" +#ifdef CONFIG_KVM int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit); +#endif #endif From 729ce7e1b6b6e035012544f51878d1ef5864bf39 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:08 +0300 Subject: [PATCH 36/47] hyperv:synic: split capability testing and setting Put a bit more consistency into handling KVM_CAP_HYPERV_SYNIC capability, by checking its availability and determining the feasibility of hv-synic property first, and enabling it later. Signed-off-by: Roman Kagan Message-Id: <20180921082217.29481-2-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- target/i386/kvm.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index b0b42d2991..2e5b9f63eb 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -735,8 +735,9 @@ static int hyperv_handle_properties(CPUState *cs) } if (cpu->hyperv_synic) { if (!has_msr_hv_synic || - kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_SYNIC, 0)) { - fprintf(stderr, "Hyper-V SynIC is not supported by kernel\n"); + !kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV_SYNIC)) { + fprintf(stderr, "Hyper-V SynIC (requested by 'hv-synic' cpu flag) " + "is not supported by kernel\n"); return -ENOSYS; } @@ -754,12 +755,14 @@ static int hyperv_handle_properties(CPUState *cs) static int hyperv_init_vcpu(X86CPU *cpu) { + CPUState *cs = CPU(cpu); + int ret; + if (cpu->hyperv_vpindex && !hv_vpindex_settable) { /* * the kernel doesn't support setting vp_index; assert that its value * is in sync */ - int ret; struct { struct kvm_msrs info; struct kvm_msr_entry entries[1]; @@ -768,7 +771,7 @@ static int hyperv_init_vcpu(X86CPU *cpu) .entries[0].index = HV_X64_MSR_VP_INDEX, }; - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); + ret = kvm_vcpu_ioctl(cs, KVM_GET_MSRS, &msr_data); if (ret < 0) { return ret; } @@ -780,6 +783,15 @@ static int hyperv_init_vcpu(X86CPU *cpu) } } + if (cpu->hyperv_synic) { + ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_SYNIC, 0); + if (ret < 0) { + error_report("failed to turn on HyperV SynIC in KVM: %s", + strerror(-ret)); + return ret; + } + } + return 0; } From 606c34bfd57a0ecda67b395bea022bb307a5384e Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:09 +0300 Subject: [PATCH 37/47] hyperv: qom-ify SynIC Make Hyper-V SynIC a device which is attached as a child to a CPU. For now it only makes SynIC visibile in the qom hierarchy, and maintains its internal fields in sync with the respecitve msrs of the parent cpu (the fields will be used in followup patches). Signed-off-by: Roman Kagan Message-Id: <20180921082217.29481-3-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- hw/hyperv/hyperv.c | 101 ++++++++++++++++++++++++++++++++++++- include/hw/hyperv/hyperv.h | 5 ++ target/i386/hyperv-stub.c | 13 +++++ target/i386/hyperv.c | 25 +++++++++ target/i386/hyperv.h | 4 ++ target/i386/kvm.c | 9 ++++ target/i386/machine.c | 9 ++++ 7 files changed, 164 insertions(+), 2 deletions(-) diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c index 97db87561e..3d6f044282 100644 --- a/hw/hyperv/hyperv.c +++ b/hw/hyperv/hyperv.c @@ -9,12 +9,103 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" +#include "qapi/error.h" #include "sysemu/kvm.h" #include "hw/hyperv/hyperv.h" +typedef struct SynICState { + DeviceState parent_obj; + + CPUState *cs; + + bool enabled; + hwaddr msg_page_addr; + hwaddr event_page_addr; +} SynICState; + +#define TYPE_SYNIC "hyperv-synic" +#define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC) + +static SynICState *get_synic(CPUState *cs) +{ + return SYNIC(object_resolve_path_component(OBJECT(cs), "synic")); +} + +static void synic_update(SynICState *synic, bool enable, + hwaddr msg_page_addr, hwaddr event_page_addr) +{ + + synic->enabled = enable; + synic->msg_page_addr = msg_page_addr; + synic->event_page_addr = event_page_addr; +} + +void hyperv_synic_update(CPUState *cs, bool enable, + hwaddr msg_page_addr, hwaddr event_page_addr) +{ + SynICState *synic = get_synic(cs); + + if (!synic) { + return; + } + + synic_update(synic, enable, msg_page_addr, event_page_addr); +} + +static void synic_realize(DeviceState *dev, Error **errp) +{ +} + +static void synic_reset(DeviceState *dev) +{ + SynICState *synic = SYNIC(dev); + synic_update(synic, false, 0, 0); +} + +static void synic_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = synic_realize; + dc->reset = synic_reset; + dc->user_creatable = false; +} + +void hyperv_synic_add(CPUState *cs) +{ + Object *obj; + SynICState *synic; + + obj = object_new(TYPE_SYNIC); + synic = SYNIC(obj); + synic->cs = cs; + object_property_add_child(OBJECT(cs), "synic", obj, &error_abort); + object_unref(obj); + object_property_set_bool(obj, true, "realized", &error_abort); +} + +void hyperv_synic_reset(CPUState *cs) +{ + device_reset(DEVICE(get_synic(cs))); +} + +static const TypeInfo synic_type_info = { + .name = TYPE_SYNIC, + .parent = TYPE_DEVICE, + .instance_size = sizeof(SynICState), + .class_init = synic_class_init, +}; + +static void synic_register_types(void) +{ + type_register_static(&synic_type_info); +} + +type_init(synic_register_types) + struct HvSintRoute { uint32_t sint; - CPUState *cs; + SynICState *synic; int gsi; EventNotifier sint_set_notifier; EventNotifier sint_ack_notifier; @@ -46,12 +137,18 @@ HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, EventNotifier *ack_notifier; int r, gsi; CPUState *cs; + SynICState *synic; cs = hyperv_find_vcpu(vp_index); if (!cs) { return NULL; } + synic = get_synic(cs); + if (!synic) { + return NULL; + } + sint_route = g_new0(HvSintRoute, 1); r = event_notifier_init(&sint_route->sint_set_notifier, false); if (r) { @@ -82,7 +179,7 @@ HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, sint_route->gsi = gsi; sint_route->sint_ack_clb = sint_ack_clb; sint_route->sint_ack_clb_data = sint_ack_clb_data; - sint_route->cs = cs; + sint_route->synic = synic; sint_route->sint = sint; sint_route->refcount = 1; diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h index d6c8d78353..6fba4762c8 100644 --- a/include/hw/hyperv/hyperv.h +++ b/include/hw/hyperv/hyperv.h @@ -28,4 +28,9 @@ static inline uint32_t hyperv_vp_index(CPUState *cs) return cs->cpu_index; } +void hyperv_synic_add(CPUState *cs); +void hyperv_synic_reset(CPUState *cs); +void hyperv_synic_update(CPUState *cs, bool enable, + hwaddr msg_page_addr, hwaddr event_page_addr); + #endif diff --git a/target/i386/hyperv-stub.c b/target/i386/hyperv-stub.c index 5919ba851c..fe548cbae2 100644 --- a/target/i386/hyperv-stub.c +++ b/target/i386/hyperv-stub.c @@ -33,3 +33,16 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) } } #endif + +int hyperv_x86_synic_add(X86CPU *cpu) +{ + return -ENOSYS; +} + +void hyperv_x86_synic_reset(X86CPU *cpu) +{ +} + +void hyperv_x86_synic_update(X86CPU *cpu) +{ +} diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index 1eac727774..0216735d67 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -16,6 +16,28 @@ #include "hw/hyperv/hyperv.h" #include "hyperv-proto.h" +int hyperv_x86_synic_add(X86CPU *cpu) +{ + hyperv_synic_add(CPU(cpu)); + return 0; +} + +void hyperv_x86_synic_reset(X86CPU *cpu) +{ + hyperv_synic_reset(CPU(cpu)); +} + +void hyperv_x86_synic_update(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + bool enable = env->msr_hv_synic_control & HV_SYNIC_ENABLE; + hwaddr msg_page_addr = (env->msr_hv_synic_msg_page & HV_SIMP_ENABLE) ? + (env->msr_hv_synic_msg_page & TARGET_PAGE_MASK) : 0; + hwaddr event_page_addr = (env->msr_hv_synic_evt_page & HV_SIEFP_ENABLE) ? + (env->msr_hv_synic_evt_page & TARGET_PAGE_MASK) : 0; + hyperv_synic_update(CPU(cpu), enable, msg_page_addr, event_page_addr); +} + int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) { CPUX86State *env = &cpu->env; @@ -44,6 +66,9 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) default: return -1; } + + hyperv_x86_synic_update(cpu); + return 0; case KVM_EXIT_HYPERV_HCALL: { uint16_t code; diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h index f0a27c3d73..67543296c3 100644 --- a/target/i386/hyperv.h +++ b/target/i386/hyperv.h @@ -22,4 +22,8 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit); #endif +int hyperv_x86_synic_add(X86CPU *cpu); +void hyperv_x86_synic_reset(X86CPU *cpu); +void hyperv_x86_synic_update(X86CPU *cpu); + #endif diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 2e5b9f63eb..cf6270ae39 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -790,6 +790,13 @@ static int hyperv_init_vcpu(X86CPU *cpu) strerror(-ret)); return ret; } + + ret = hyperv_x86_synic_add(cpu); + if (ret < 0) { + error_report("failed to create HyperV SynIC: %s", + strerror(-ret)); + return ret; + } } return 0; @@ -1250,6 +1257,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) for (i = 0; i < ARRAY_SIZE(env->msr_hv_synic_sint); i++) { env->msr_hv_synic_sint[i] = HV_SINT_MASKED; } + + hyperv_x86_synic_reset(cpu); } } diff --git a/target/i386/machine.c b/target/i386/machine.c index 084c2c73a8..225b5d433b 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -7,6 +7,7 @@ #include "hw/i386/pc.h" #include "hw/isa/isa.h" #include "migration/cpu.h" +#include "hyperv.h" #include "sysemu/kvm.h" @@ -672,11 +673,19 @@ static bool hyperv_synic_enable_needed(void *opaque) return false; } +static int hyperv_synic_post_load(void *opaque, int version_id) +{ + X86CPU *cpu = opaque; + hyperv_x86_synic_update(cpu); + return 0; +} + static const VMStateDescription vmstate_msr_hyperv_synic = { .name = "cpu/msr_hyperv_synic", .version_id = 1, .minimum_version_id = 1, .needed = hyperv_synic_enable_needed, + .post_load = hyperv_synic_post_load, .fields = (VMStateField[]) { VMSTATE_UINT64(env.msr_hv_synic_control, X86CPU), VMSTATE_UINT64(env.msr_hv_synic_evt_page, X86CPU), From 9b4cf107b09d18ac30f46fd1c4de8585ccba030c Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:10 +0300 Subject: [PATCH 38/47] hyperv: only add SynIC in compatible configurations Certain configurations do not allow SynIC to be used in QEMU. In particular, - when hyperv_vpindex is off, SINT routes can't be used as they refer to the destination vCPU by vp_index - older KVM (which doesn't expose KVM_CAP_HYPERV_SYNIC2) zeroes out SynIC message and event pages on every msr load, breaking migration OTOH in-KVM users of SynIC -- SynIC timers -- do work in those configurations, and we shouldn't stop the guest from using them. To cover both scenarios, introduce an X86CPU property that makes CPU init code to skip creation of the SynIC object (and thus disables any SynIC use in QEMU) but keeps the KVM part of the SynIC working. The property is clear by default but is set via compat logic for older machine types. As a result, when hv_synic and a modern machine type are specified, QEMU will refuse to run unless vp_index is on and the kernel is recent enough. OTOH with an older machine type QEMU will run fine with hv_synic=on against an older kernel and/or without vp_index enabled but will disallow the in-QEMU uses of SynIC (in e.g. VMBus). Signed-off-by: Roman Kagan Message-Id: <20180921082217.29481-4-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- include/hw/i386/pc.h | 8 ++++++++ target/i386/cpu.c | 2 ++ target/i386/cpu.h | 1 + target/i386/kvm.c | 30 ++++++++++++++++++++++-------- 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 6894f37df1..dfe6746692 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -294,6 +294,14 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); +#define PC_COMPAT_3_0 \ + HW_COMPAT_3_0 \ + {\ + .driver = TYPE_X86_CPU,\ + .property = "x-hv-synic-kvm-only",\ + .value = "on",\ + } + #define PC_COMPAT_2_12 \ HW_COMPAT_2_12 \ {\ diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 32ea041c06..f0d9f7cf49 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5607,6 +5607,8 @@ static Property x86_cpu_properties[] = { * to the specific Windows version being used." */ DEFINE_PROP_INT32("x-hv-max-vps", X86CPU, hv_max_vps, -1), + DEFINE_PROP_BOOL("x-hv-synic-kvm-only", X86CPU, hyperv_synic_kvm_only, + false), DEFINE_PROP_END_OF_LIST() }; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index caa1544b2e..663f3a5e67 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1378,6 +1378,7 @@ struct X86CPU { bool hyperv_vpindex; bool hyperv_runtime; bool hyperv_synic; + bool hyperv_synic_kvm_only; bool hyperv_stimer; bool hyperv_frequencies; bool hyperv_reenlightenment; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index cf6270ae39..7b7a56593e 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -734,8 +734,18 @@ static int hyperv_handle_properties(CPUState *cs) env->features[FEAT_HYPERV_EAX] |= HV_VP_RUNTIME_AVAILABLE; } if (cpu->hyperv_synic) { - if (!has_msr_hv_synic || - !kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV_SYNIC)) { + unsigned int cap = KVM_CAP_HYPERV_SYNIC; + if (!cpu->hyperv_synic_kvm_only) { + if (!cpu->hyperv_vpindex) { + fprintf(stderr, "Hyper-V SynIC " + "(requested by 'hv-synic' cpu flag) " + "requires Hyper-V VP_INDEX ('hv-vpindex')\n"); + return -ENOSYS; + } + cap = KVM_CAP_HYPERV_SYNIC2; + } + + if (!has_msr_hv_synic || !kvm_check_extension(cs->kvm_state, cap)) { fprintf(stderr, "Hyper-V SynIC (requested by 'hv-synic' cpu flag) " "is not supported by kernel\n"); return -ENOSYS; @@ -784,18 +794,22 @@ static int hyperv_init_vcpu(X86CPU *cpu) } if (cpu->hyperv_synic) { - ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_SYNIC, 0); + uint32_t synic_cap = cpu->hyperv_synic_kvm_only ? + KVM_CAP_HYPERV_SYNIC : KVM_CAP_HYPERV_SYNIC2; + ret = kvm_vcpu_enable_cap(cs, synic_cap, 0); if (ret < 0) { error_report("failed to turn on HyperV SynIC in KVM: %s", strerror(-ret)); return ret; } - ret = hyperv_x86_synic_add(cpu); - if (ret < 0) { - error_report("failed to create HyperV SynIC: %s", - strerror(-ret)); - return ret; + if (!cpu->hyperv_synic_kvm_only) { + ret = hyperv_x86_synic_add(cpu); + if (ret < 0) { + error_report("failed to create HyperV SynIC: %s", + strerror(-ret)); + return ret; + } } } From 267e071bd6d675c15e7ffbf8aaf44d488ebd5c83 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:11 +0300 Subject: [PATCH 39/47] hyperv: make overlay pages for SynIC Per Hyper-V spec, SynIC message and event flag pages are to be implemented as so called overlay pages. That is, they are owned by the hypervisor and, when mapped into the guest physical address space, overlay the guest physical pages such that 1) the overlaid guest page becomes invisible to the guest CPUs until the overlay page is turned off 2) the contents of the overlay page is preserved when it's turned off and back on, even at a different address; it's only zeroed at vcpu reset This particular nature of SynIC message and event flag pages is ignored in the current code, and guest physical pages are used directly instead. This happens to (mostly) work because the actual guests seem not to depend on the features listed above. This patch implements those pages as the spec mandates. Since the extra RAM regions, which introduce migration incompatibility, are only added at SynIC object creation which only happens when hyperv_synic_kvm_only == false, no extra compat logic is necessary. Signed-off-by: Roman Kagan Message-Id: <20180921082217.29481-5-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- hw/hyperv/hyperv.c | 51 +++++++++++++++++++++++++++++++++++++++++--- target/i386/hyperv.c | 20 +++++++++++------ 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c index 3d6f044282..70cf129d04 100644 --- a/hw/hyperv/hyperv.c +++ b/hw/hyperv/hyperv.c @@ -10,6 +10,7 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "qapi/error.h" +#include "exec/address-spaces.h" #include "sysemu/kvm.h" #include "hw/hyperv/hyperv.h" @@ -21,6 +22,10 @@ typedef struct SynICState { bool enabled; hwaddr msg_page_addr; hwaddr event_page_addr; + MemoryRegion msg_page_mr; + MemoryRegion event_page_mr; + struct hyperv_message_page *msg_page; + struct hyperv_event_flags_page *event_page; } SynICState; #define TYPE_SYNIC "hyperv-synic" @@ -36,8 +41,28 @@ static void synic_update(SynICState *synic, bool enable, { synic->enabled = enable; - synic->msg_page_addr = msg_page_addr; - synic->event_page_addr = event_page_addr; + if (synic->msg_page_addr != msg_page_addr) { + if (synic->msg_page_addr) { + memory_region_del_subregion(get_system_memory(), + &synic->msg_page_mr); + } + if (msg_page_addr) { + memory_region_add_subregion(get_system_memory(), msg_page_addr, + &synic->msg_page_mr); + } + synic->msg_page_addr = msg_page_addr; + } + if (synic->event_page_addr != event_page_addr) { + if (synic->event_page_addr) { + memory_region_del_subregion(get_system_memory(), + &synic->event_page_mr); + } + if (event_page_addr) { + memory_region_add_subregion(get_system_memory(), event_page_addr, + &synic->event_page_mr); + } + synic->event_page_addr = event_page_addr; + } } void hyperv_synic_update(CPUState *cs, bool enable, @@ -54,11 +79,31 @@ void hyperv_synic_update(CPUState *cs, bool enable, static void synic_realize(DeviceState *dev, Error **errp) { -} + Object *obj = OBJECT(dev); + SynICState *synic = SYNIC(dev); + char *msgp_name, *eventp_name; + uint32_t vp_index; + /* memory region names have to be globally unique */ + vp_index = hyperv_vp_index(synic->cs); + msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index); + eventp_name = g_strdup_printf("synic-%u-event-page", vp_index); + + memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name, + sizeof(*synic->msg_page), &error_abort); + memory_region_init_ram(&synic->event_page_mr, obj, eventp_name, + sizeof(*synic->event_page), &error_abort); + synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr); + synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr); + + g_free(msgp_name); + g_free(eventp_name); +} static void synic_reset(DeviceState *dev) { SynICState *synic = SYNIC(dev); + memset(synic->msg_page, 0, sizeof(*synic->msg_page)); + memset(synic->event_page, 0, sizeof(*synic->event_page)); synic_update(synic, false, 0, 0); } diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index 0216735d67..3f76c3e266 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -12,6 +12,7 @@ */ #include "qemu/osdep.h" +#include "qemu/main-loop.h" #include "hyperv.h" #include "hw/hyperv/hyperv.h" #include "hyperv-proto.h" @@ -38,6 +39,13 @@ void hyperv_x86_synic_update(X86CPU *cpu) hyperv_synic_update(CPU(cpu), enable, msg_page_addr, event_page_addr); } +static void async_synic_update(CPUState *cs, run_on_cpu_data data) +{ + qemu_mutex_lock_iothread(); + hyperv_x86_synic_update(X86_CPU(cs)); + qemu_mutex_unlock_iothread(); +} + int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) { CPUX86State *env = &cpu->env; @@ -48,11 +56,6 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) return -1; } - /* - * For now just track changes in SynIC control and msg/evt pages msr's. - * When SynIC messaging/events processing will be added in future - * here we will do messages queues flushing and pages remapping. - */ switch (exit->u.synic.msr) { case HV_X64_MSR_SCONTROL: env->msr_hv_synic_control = exit->u.synic.control; @@ -67,7 +70,12 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) return -1; } - hyperv_x86_synic_update(cpu); + /* + * this will run in this cpu thread before it returns to KVM, but in a + * safe environment (i.e. when all cpus are quiescent) -- this is + * necessary because memory hierarchy is being changed + */ + async_safe_run_on_cpu(CPU(cpu), async_synic_update, RUN_ON_CPU_NULL); return 0; case KVM_EXIT_HYPERV_HCALL: { From 4cbaf3c13300b79d0386b567630f8e9c91ac5099 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:12 +0300 Subject: [PATCH 40/47] hyperv: add synic message delivery Add infrastructure to deliver SynIC messages to the SynIC message page. Note that KVM may also want to deliver (SynIC timer) messages to the same message slot. The problem is that the access to a SynIC message slot is controlled by the value of its .msg_type field which indicates if the slot is being owned by the hypervisor (zero) or by the guest (non-zero). This leaves no room for synchronizing multiple concurrent producers. The simplest way to deal with this for both KVM and QEMU is to only deliver messages in the vcpu thread. KVM already does this; this patch makes it for QEMU, too. Specifically, - add a function for posting messages, which only copies the message into the staging buffer if its free, and schedules a work on the corresponding vcpu to actually deliver it to the guest slot; - instead of a sint ack callback, set up the sint route with a message status callback. This function is called in a bh whenever there are updates to the message slot status: either the vcpu made definitive progress delivering the message from the staging buffer (succeeded or failed) or the guest issued EOM; the status is passed as an argument to the callback. Signed-off-by: Roman Kagan Message-Id: <20180921082217.29481-6-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- hw/hyperv/hyperv.c | 162 ++++++++++++++++++++++++++++++++++--- include/hw/hyperv/hyperv.h | 18 ++++- 2 files changed, 166 insertions(+), 14 deletions(-) diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c index 70cf129d04..654ca4ffc5 100644 --- a/hw/hyperv/hyperv.c +++ b/hw/hyperv/hyperv.c @@ -148,14 +148,51 @@ static void synic_register_types(void) type_init(synic_register_types) +/* + * KVM has its own message producers (SynIC timers). To guarantee + * serialization with both KVM vcpu and the guest cpu, the messages are first + * staged in an intermediate area and then posted to the SynIC message page in + * the vcpu thread. + */ +typedef struct HvSintStagedMessage { + /* message content staged by hyperv_post_msg */ + struct hyperv_message msg; + /* callback + data (r/o) to complete the processing in a BH */ + HvSintMsgCb cb; + void *cb_data; + /* message posting status filled by cpu_post_msg */ + int status; + /* passing the buck: */ + enum { + /* initial state */ + HV_STAGED_MSG_FREE, + /* + * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE -> + * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu + */ + HV_STAGED_MSG_BUSY, + /* + * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot, + * notify the guest, records the status, marks the posting done (BUSY + * -> POSTED), and schedules sint_msg_bh BH + */ + HV_STAGED_MSG_POSTED, + /* + * sint_msg_bh (BH) verifies that the posting is done, runs the + * callback, and starts over (POSTED -> FREE) + */ + } state; +} HvSintStagedMessage; + struct HvSintRoute { uint32_t sint; SynICState *synic; int gsi; EventNotifier sint_set_notifier; EventNotifier sint_ack_notifier; - HvSintAckClb sint_ack_clb; - void *sint_ack_clb_data; + + HvSintStagedMessage *staged_msg; + unsigned refcount; }; @@ -166,17 +203,115 @@ static CPUState *hyperv_find_vcpu(uint32_t vp_index) return cs; } -static void kvm_hv_sint_ack_handler(EventNotifier *notifier) +/* + * BH to complete the processing of a staged message. + */ +static void sint_msg_bh(void *opaque) +{ + HvSintRoute *sint_route = opaque; + HvSintStagedMessage *staged_msg = sint_route->staged_msg; + + if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) { + /* status nor ready yet (spurious ack from guest?), ignore */ + return; + } + + staged_msg->cb(staged_msg->cb_data, staged_msg->status); + staged_msg->status = 0; + + /* staged message processing finished, ready to start over */ + atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE); + /* drop the reference taken in hyperv_post_msg */ + hyperv_sint_route_unref(sint_route); +} + +/* + * Worker to transfer the message from the staging area into the SynIC message + * page in vcpu context. + */ +static void cpu_post_msg(CPUState *cs, run_on_cpu_data data) +{ + HvSintRoute *sint_route = data.host_ptr; + HvSintStagedMessage *staged_msg = sint_route->staged_msg; + SynICState *synic = sint_route->synic; + struct hyperv_message *dst_msg; + bool wait_for_sint_ack = false; + + assert(staged_msg->state == HV_STAGED_MSG_BUSY); + + if (!synic->enabled || !synic->msg_page_addr) { + staged_msg->status = -ENXIO; + goto posted; + } + + dst_msg = &synic->msg_page->slot[sint_route->sint]; + + if (dst_msg->header.message_type != HV_MESSAGE_NONE) { + dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING; + staged_msg->status = -EAGAIN; + wait_for_sint_ack = true; + } else { + memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg)); + staged_msg->status = hyperv_sint_route_set_sint(sint_route); + } + + memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page)); + +posted: + atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED); + /* + * Notify the msg originator of the progress made; if the slot was busy we + * set msg_pending flag in it so it will be the guest who will do EOM and + * trigger the notification from KVM via sint_ack_notifier + */ + if (!wait_for_sint_ack) { + aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, + sint_route); + } +} + +/* + * Post a Hyper-V message to the staging area, for delivery to guest in the + * vcpu thread. + */ +int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg) +{ + HvSintStagedMessage *staged_msg = sint_route->staged_msg; + + assert(staged_msg); + + /* grab the staging area */ + if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE, + HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) { + return -EAGAIN; + } + + memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg)); + + /* hold a reference on sint_route until the callback is finished */ + hyperv_sint_route_ref(sint_route); + + /* schedule message posting attempt in vcpu thread */ + async_run_on_cpu(sint_route->synic->cs, cpu_post_msg, + RUN_ON_CPU_HOST_PTR(sint_route)); + return 0; +} + +static void sint_ack_handler(EventNotifier *notifier) { HvSintRoute *sint_route = container_of(notifier, HvSintRoute, sint_ack_notifier); event_notifier_test_and_clear(notifier); - sint_route->sint_ack_clb(sint_route->sint_ack_clb_data); + + /* + * the guest consumed the previous message so complete the current one with + * -EAGAIN and let the msg originator retry + */ + aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route); } HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb, - void *sint_ack_clb_data) + HvSintMsgCb cb, void *cb_data) { HvSintRoute *sint_route; EventNotifier *ack_notifier; @@ -200,14 +335,19 @@ HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, goto err; } - ack_notifier = sint_ack_clb ? &sint_route->sint_ack_notifier : NULL; + + ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL; if (ack_notifier) { + sint_route->staged_msg = g_new0(HvSintStagedMessage, 1); + sint_route->staged_msg->cb = cb; + sint_route->staged_msg->cb_data = cb_data; + r = event_notifier_init(ack_notifier, false); if (r) { goto err_sint_set_notifier; } - event_notifier_set_handler(ack_notifier, kvm_hv_sint_ack_handler); + event_notifier_set_handler(ack_notifier, sint_ack_handler); } gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); @@ -222,8 +362,6 @@ HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, goto err_irqfd; } sint_route->gsi = gsi; - sint_route->sint_ack_clb = sint_ack_clb; - sint_route->sint_ack_clb_data = sint_ack_clb_data; sint_route->synic = synic; sint_route->sint = sint; sint_route->refcount = 1; @@ -236,6 +374,7 @@ err_gsi: if (ack_notifier) { event_notifier_set_handler(ack_notifier, NULL); event_notifier_cleanup(ack_notifier); + g_free(sint_route->staged_msg); } err_sint_set_notifier: event_notifier_cleanup(&sint_route->sint_set_notifier); @@ -266,9 +405,10 @@ void hyperv_sint_route_unref(HvSintRoute *sint_route) &sint_route->sint_set_notifier, sint_route->gsi); kvm_irqchip_release_virq(kvm_state, sint_route->gsi); - if (sint_route->sint_ack_clb) { + if (sint_route->staged_msg) { event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); event_notifier_cleanup(&sint_route->sint_ack_notifier); + g_free(sint_route->staged_msg); } event_notifier_cleanup(&sint_route->sint_set_notifier); g_free(sint_route); diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h index 6fba4762c8..82d561fc88 100644 --- a/include/hw/hyperv/hyperv.h +++ b/include/hw/hyperv/hyperv.h @@ -11,18 +11,30 @@ #define HW_HYPERV_HYPERV_H #include "cpu-qom.h" +#include "hw/hyperv/hyperv-proto.h" typedef struct HvSintRoute HvSintRoute; -typedef void (*HvSintAckClb)(void *data); + +/* + * Callback executed in a bottom-half when the status of posting the message + * becomes known, before unblocking the connection for further messages + */ +typedef void (*HvSintMsgCb)(void *data, int status); HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb, - void *sint_ack_clb_data); + HvSintMsgCb cb, void *cb_data); void hyperv_sint_route_ref(HvSintRoute *sint_route); void hyperv_sint_route_unref(HvSintRoute *sint_route); int hyperv_sint_route_set_sint(HvSintRoute *sint_route); +/* + * Submit a message to be posted in vcpu context. If the submission succeeds, + * the status of posting the message is reported via the callback associated + * with the @sint_route; until then no more messages are accepted. + */ +int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *msg); + static inline uint32_t hyperv_vp_index(CPUState *cs) { return cs->cpu_index; From f5642f8b458ba578c1ea94b9ad773e1e5c6cb615 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:13 +0300 Subject: [PATCH 41/47] hyperv: add synic event flag signaling Add infrastructure to signal SynIC event flags by atomically setting the corresponding bit in the event flags page and firing a SINT if necessary. Signed-off-by: Roman Kagan Message-Id: <20180921082217.29481-7-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- hw/hyperv/hyperv.c | 32 ++++++++++++++++++++++++++++++++ include/hw/hyperv/hyperv.h | 4 ++++ 2 files changed, 36 insertions(+) diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c index 654ca4ffc5..2b0e593bf9 100644 --- a/hw/hyperv/hyperv.c +++ b/hw/hyperv/hyperv.c @@ -12,6 +12,7 @@ #include "qapi/error.h" #include "exec/address-spaces.h" #include "sysemu/kvm.h" +#include "qemu/bitops.h" #include "hw/hyperv/hyperv.h" typedef struct SynICState { @@ -310,6 +311,37 @@ static void sint_ack_handler(EventNotifier *notifier) aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route); } +/* + * Set given event flag for a given sint on a given vcpu, and signal the sint. + */ +int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno) +{ + int ret; + SynICState *synic = sint_route->synic; + unsigned long *flags, set_mask; + unsigned set_idx; + + if (eventno > HV_EVENT_FLAGS_COUNT) { + return -EINVAL; + } + if (!synic->enabled || !synic->event_page_addr) { + return -ENXIO; + } + + set_idx = BIT_WORD(eventno); + set_mask = BIT_MASK(eventno); + flags = synic->event_page->slot[sint_route->sint].flags; + + if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) { + memory_region_set_dirty(&synic->event_page_mr, 0, + sizeof(*synic->event_page)); + ret = hyperv_sint_route_set_sint(sint_route); + } else { + ret = 0; + } + return ret; +} + HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, HvSintMsgCb cb, void *cb_data) { diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h index 82d561fc88..757c85eb8f 100644 --- a/include/hw/hyperv/hyperv.h +++ b/include/hw/hyperv/hyperv.h @@ -34,6 +34,10 @@ int hyperv_sint_route_set_sint(HvSintRoute *sint_route); * with the @sint_route; until then no more messages are accepted. */ int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *msg); +/* + * Set event flag @eventno, and signal the SINT if the flag has changed. + */ +int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno); static inline uint32_t hyperv_vp_index(CPUState *cs) { From e6ea9f45b72fe83d49adda948ff397dafc00c68f Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:14 +0300 Subject: [PATCH 42/47] hyperv: process SIGNAL_EVENT hypercall Add handling of SIGNAL_EVENT hypercall. For that, provide an interface to associate an EventNotifier with an event connection number, so that it's signaled when the SIGNAL_EVENT hypercall with the matching connection ID is called by the guest. Support for using KVM functionality for this will be added in a followup patch. Signed-off-by: Roman Kagan Message-Id: <20180921082217.29481-8-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- hw/hyperv/hyperv.c | 93 ++++++++++++++++++++++++++++++++ include/hw/hyperv/hyperv-proto.h | 1 + include/hw/hyperv/hyperv.h | 13 +++++ target/i386/hyperv.c | 10 ++-- 4 files changed, 113 insertions(+), 4 deletions(-) diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c index 2b0e593bf9..d745016c17 100644 --- a/hw/hyperv/hyperv.c +++ b/hw/hyperv/hyperv.c @@ -13,6 +13,9 @@ #include "exec/address-spaces.h" #include "sysemu/kvm.h" #include "qemu/bitops.h" +#include "qemu/queue.h" +#include "qemu/rcu.h" +#include "qemu/rcu_queue.h" #include "hw/hyperv/hyperv.h" typedef struct SynICState { @@ -450,3 +453,93 @@ int hyperv_sint_route_set_sint(HvSintRoute *sint_route) { return event_notifier_set(&sint_route->sint_set_notifier); } + +typedef struct EventFlagHandler { + struct rcu_head rcu; + QLIST_ENTRY(EventFlagHandler) link; + uint32_t conn_id; + EventNotifier *notifier; +} EventFlagHandler; + +static QLIST_HEAD(, EventFlagHandler) event_flag_handlers; +static QemuMutex handlers_mutex; + +static void __attribute__((constructor)) hv_init(void) +{ + QLIST_INIT(&event_flag_handlers); + qemu_mutex_init(&handlers_mutex); +} + +int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) +{ + int ret; + EventFlagHandler *handler; + + qemu_mutex_lock(&handlers_mutex); + QLIST_FOREACH(handler, &event_flag_handlers, link) { + if (handler->conn_id == conn_id) { + if (notifier) { + ret = -EEXIST; + } else { + QLIST_REMOVE_RCU(handler, link); + g_free_rcu(handler, rcu); + ret = 0; + } + goto unlock; + } + } + + if (notifier) { + handler = g_new(EventFlagHandler, 1); + handler->conn_id = conn_id; + handler->notifier = notifier; + QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link); + ret = 0; + } else { + ret = -ENOENT; + } +unlock: + qemu_mutex_unlock(&handlers_mutex); + return ret; +} + +uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast) +{ + uint16_t ret; + EventFlagHandler *handler; + + if (unlikely(!fast)) { + hwaddr addr = param; + + if (addr & (__alignof__(addr) - 1)) { + return HV_STATUS_INVALID_ALIGNMENT; + } + + param = ldq_phys(&address_space_memory, addr); + } + + /* + * Per spec, bits 32-47 contain the extra "flag number". However, we + * have no use for it, and in all known usecases it is zero, so just + * report lookup failure if it isn't. + */ + if (param & 0xffff00000000ULL) { + return HV_STATUS_INVALID_PORT_ID; + } + /* remaining bits are reserved-zero */ + if (param & ~HV_CONNECTION_ID_MASK) { + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } + + ret = HV_STATUS_INVALID_CONNECTION_ID; + rcu_read_lock(); + QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) { + if (handler->conn_id == param) { + event_notifier_set(handler->notifier); + ret = 0; + break; + } + } + rcu_read_unlock(); + return ret; +} diff --git a/include/hw/hyperv/hyperv-proto.h b/include/hw/hyperv/hyperv-proto.h index 2dc78eeafb..21dc28aee9 100644 --- a/include/hw/hyperv/hyperv-proto.h +++ b/include/hw/hyperv/hyperv-proto.h @@ -21,6 +21,7 @@ #define HV_STATUS_INVALID_ALIGNMENT 4 #define HV_STATUS_INVALID_PARAMETER 5 #define HV_STATUS_INSUFFICIENT_MEMORY 11 +#define HV_STATUS_INVALID_PORT_ID 17 #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h index 757c85eb8f..df92ed7e66 100644 --- a/include/hw/hyperv/hyperv.h +++ b/include/hw/hyperv/hyperv.h @@ -39,6 +39,19 @@ int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *msg); */ int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno); +/* + * Associate @notifier with the event connection @conn_id, such that @notifier + * is signaled when the guest executes HV_SIGNAL_EVENT hypercall on @conn_id. + * If @notifier is NULL clear the association. + */ +int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier); + +/* + * Process HV_SIGNAL_EVENT hypercall: signal the EventNotifier associated with + * the connection as specified in @param. + */ +uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast); + static inline uint32_t hyperv_vp_index(CPUState *cs) { return cs->cpu_index; diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index 3f76c3e266..96b3b5ad7f 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -79,16 +79,18 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) return 0; case KVM_EXIT_HYPERV_HCALL: { - uint16_t code; + uint16_t code = exit->u.hcall.input & 0xffff; + bool fast = exit->u.hcall.input & HV_HYPERCALL_FAST; + uint64_t param = exit->u.hcall.params[0]; - code = exit->u.hcall.input & 0xffff; switch (code) { - case HV_POST_MESSAGE: case HV_SIGNAL_EVENT: + exit->u.hcall.result = hyperv_hcall_signal_event(param, fast); + break; default: exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE; - return 0; } + return 0; } default: return -1; From 8d3bc0b75318dce70928b8bde1a49e632adf5137 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:15 +0300 Subject: [PATCH 43/47] hyperv: add support for KVM_HYPERV_EVENTFD When setting up a notifier for Hyper-V event connection, try to use the KVM-assisted one first, and fall back to userspace handling of the hypercall if the kernel doesn't provide the requested feature. Signed-off-by: Roman Kagan Message-Id: <20180921082217.29481-9-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- hw/hyperv/hyperv.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c index d745016c17..7a1c1bbee4 100644 --- a/hw/hyperv/hyperv.c +++ b/hw/hyperv/hyperv.c @@ -13,6 +13,7 @@ #include "exec/address-spaces.h" #include "sysemu/kvm.h" #include "qemu/bitops.h" +#include "qemu/error-report.h" #include "qemu/queue.h" #include "qemu/rcu.h" #include "qemu/rcu_queue.h" @@ -470,7 +471,7 @@ static void __attribute__((constructor)) hv_init(void) qemu_mutex_init(&handlers_mutex); } -int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) +static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) { int ret; EventFlagHandler *handler; @@ -503,6 +504,30 @@ unlock: return ret; } +static bool process_event_flags_userspace; + +int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) +{ + if (!process_event_flags_userspace && + !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) { + process_event_flags_userspace = true; + + warn_report("Hyper-V event signaling is not supported by this kernel; " + "using slower userspace hypercall processing"); + } + + if (!process_event_flags_userspace) { + struct kvm_hyperv_eventfd hvevfd = { + .conn_id = conn_id, + .fd = notifier ? event_notifier_get_fd(notifier) : -1, + .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN, + }; + + return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd); + } + return set_event_flag_handler(conn_id, notifier); +} + uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast) { uint16_t ret; From 76036a5fc7ca632f805748aeef416355b1d212a3 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:16 +0300 Subject: [PATCH 44/47] hyperv: process POST_MESSAGE hypercall Add handling of POST_MESSAGE hypercall. For that, add an interface to regsiter a handler for the messages arrived from the guest on a particular connection id (IOW set up a message connection in Hyper-V speak). Signed-off-by: Roman Kagan Message-Id: <20180921082217.29481-10-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- hw/hyperv/hyperv.c | 84 ++++++++++++++++++++++++++++++++++++++ include/hw/hyperv/hyperv.h | 18 ++++++++ target/i386/hyperv.c | 3 ++ 3 files changed, 105 insertions(+) diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c index 7a1c1bbee4..a28e7249d8 100644 --- a/hw/hyperv/hyperv.c +++ b/hw/hyperv/hyperv.c @@ -455,6 +455,14 @@ int hyperv_sint_route_set_sint(HvSintRoute *sint_route) return event_notifier_set(&sint_route->sint_set_notifier); } +typedef struct MsgHandler { + struct rcu_head rcu; + QLIST_ENTRY(MsgHandler) link; + uint32_t conn_id; + HvMsgHandler handler; + void *data; +} MsgHandler; + typedef struct EventFlagHandler { struct rcu_head rcu; QLIST_ENTRY(EventFlagHandler) link; @@ -462,15 +470,91 @@ typedef struct EventFlagHandler { EventNotifier *notifier; } EventFlagHandler; +static QLIST_HEAD(, MsgHandler) msg_handlers; static QLIST_HEAD(, EventFlagHandler) event_flag_handlers; static QemuMutex handlers_mutex; static void __attribute__((constructor)) hv_init(void) { + QLIST_INIT(&msg_handlers); QLIST_INIT(&event_flag_handlers); qemu_mutex_init(&handlers_mutex); } +int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data) +{ + int ret; + MsgHandler *mh; + + qemu_mutex_lock(&handlers_mutex); + QLIST_FOREACH(mh, &msg_handlers, link) { + if (mh->conn_id == conn_id) { + if (handler) { + ret = -EEXIST; + } else { + QLIST_REMOVE_RCU(mh, link); + g_free_rcu(mh, rcu); + ret = 0; + } + goto unlock; + } + } + + if (handler) { + mh = g_new(MsgHandler, 1); + mh->conn_id = conn_id; + mh->handler = handler; + mh->data = data; + QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link); + ret = 0; + } else { + ret = -ENOENT; + } +unlock: + qemu_mutex_unlock(&handlers_mutex); + return ret; +} + +uint16_t hyperv_hcall_post_message(uint64_t param, bool fast) +{ + uint16_t ret; + hwaddr len; + struct hyperv_post_message_input *msg; + MsgHandler *mh; + + if (fast) { + return HV_STATUS_INVALID_HYPERCALL_CODE; + } + if (param & (__alignof__(*msg) - 1)) { + return HV_STATUS_INVALID_ALIGNMENT; + } + + len = sizeof(*msg); + msg = cpu_physical_memory_map(param, &len, 0); + if (len < sizeof(*msg)) { + ret = HV_STATUS_INSUFFICIENT_MEMORY; + goto unmap; + } + if (msg->payload_size > sizeof(msg->payload)) { + ret = HV_STATUS_INVALID_HYPERCALL_INPUT; + goto unmap; + } + + ret = HV_STATUS_INVALID_CONNECTION_ID; + rcu_read_lock(); + QLIST_FOREACH_RCU(mh, &msg_handlers, link) { + if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) { + ret = mh->handler(msg, mh->data); + break; + } + } + rcu_read_unlock(); + +unmap: + cpu_physical_memory_unmap(msg, len, 0, 0); + return ret; +} + static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) { int ret; diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h index df92ed7e66..597381cb01 100644 --- a/include/hw/hyperv/hyperv.h +++ b/include/hw/hyperv/hyperv.h @@ -39,6 +39,18 @@ int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *msg); */ int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno); +/* + * Handler for messages arriving from the guest via HV_POST_MESSAGE hypercall. + * Executed in vcpu context. + */ +typedef uint16_t (*HvMsgHandler)(const struct hyperv_post_message_input *msg, + void *data); +/* + * Associate @handler with the message connection @conn_id, such that @handler + * is called with @data when the guest executes HV_POST_MESSAGE hypercall on + * @conn_id. If @handler is NULL clear the association. + */ +int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data); /* * Associate @notifier with the event connection @conn_id, such that @notifier * is signaled when the guest executes HV_SIGNAL_EVENT hypercall on @conn_id. @@ -46,6 +58,12 @@ int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno); */ int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier); +/* + * Process HV_POST_MESSAGE hypercall: parse the data in the guest memory as + * specified in @param, and call the HvMsgHandler associated with the + * connection on the message contained therein. + */ +uint16_t hyperv_hcall_post_message(uint64_t param, bool fast); /* * Process HV_SIGNAL_EVENT hypercall: signal the EventNotifier associated with * the connection as specified in @param. diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index 96b3b5ad7f..b264a28620 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -84,6 +84,9 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) uint64_t param = exit->u.hcall.params[0]; switch (code) { + case HV_POST_MESSAGE: + exit->u.hcall.result = hyperv_hcall_post_message(param, fast); + break; case HV_SIGNAL_EVENT: exit->u.hcall.result = hyperv_hcall_signal_event(param, fast); break; From 6738ccfff0043b9fd6ecc823884c6f114a35a3c5 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 21 Sep 2018 11:22:17 +0300 Subject: [PATCH 45/47] hyperv_testdev: add SynIC message and event testmodes Add testmodes for SynIC messages and events. The message or event connection setup / teardown is initiated by the guest via new control codes written to the test device port. Then the test connections bounce the respective operations back to the guest, i.e. the incoming messages are posted or the incoming events are signaled on the configured vCPUs. Signed-off-by: Roman Kagan Signed-off-by: Paolo Bonzini --- hw/hyperv/hyperv_testdev.c | 165 ++++++++++++++++++++++++++++++++++++- 1 file changed, 164 insertions(+), 1 deletion(-) diff --git a/hw/hyperv/hyperv_testdev.c b/hw/hyperv/hyperv_testdev.c index fc3f6c5666..4880333cf5 100644 --- a/hw/hyperv/hyperv_testdev.c +++ b/hw/hyperv/hyperv_testdev.c @@ -12,6 +12,7 @@ */ #include "qemu/osdep.h" +#include "qemu/main-loop.h" #include "qemu/queue.h" #include "hw/qdev.h" #include "hw/isa/isa.h" @@ -24,10 +25,26 @@ typedef struct TestSintRoute { HvSintRoute *sint_route; } TestSintRoute; +typedef struct TestMsgConn { + QLIST_ENTRY(TestMsgConn) le; + uint8_t conn_id; + HvSintRoute *sint_route; + struct hyperv_message msg; +} TestMsgConn; + +typedef struct TestEvtConn { + QLIST_ENTRY(TestEvtConn) le; + uint8_t conn_id; + HvSintRoute *sint_route; + EventNotifier notifier; +} TestEvtConn; + struct HypervTestDev { ISADevice parent_obj; MemoryRegion sint_control; QLIST_HEAD(, TestSintRoute) sint_routes; + QLIST_HEAD(, TestMsgConn) msg_conns; + QLIST_HEAD(, TestEvtConn) evt_conns; }; typedef struct HypervTestDev HypervTestDev; @@ -38,7 +55,11 @@ typedef struct HypervTestDev HypervTestDev; enum { HV_TEST_DEV_SINT_ROUTE_CREATE = 1, HV_TEST_DEV_SINT_ROUTE_DESTROY, - HV_TEST_DEV_SINT_ROUTE_SET_SINT + HV_TEST_DEV_SINT_ROUTE_SET_SINT, + HV_TEST_DEV_MSG_CONN_CREATE, + HV_TEST_DEV_MSG_CONN_DESTROY, + HV_TEST_DEV_EVT_CONN_CREATE, + HV_TEST_DEV_EVT_CONN_DESTROY, }; static void sint_route_create(HypervTestDev *dev, @@ -93,6 +114,133 @@ static void sint_route_set_sint(HypervTestDev *dev, hyperv_sint_route_set_sint(sint_route->sint_route); } +static void msg_retry(void *opaque) +{ + TestMsgConn *conn = opaque; + assert(!hyperv_post_msg(conn->sint_route, &conn->msg)); +} + +static void msg_cb(void *data, int status) +{ + TestMsgConn *conn = data; + + if (!status) { + return; + } + + assert(status == -EAGAIN); + + aio_bh_schedule_oneshot(qemu_get_aio_context(), msg_retry, conn); +} + +static uint16_t msg_handler(const struct hyperv_post_message_input *msg, + void *data) +{ + int ret; + TestMsgConn *conn = data; + + /* post the same message we've got */ + conn->msg.header.message_type = msg->message_type; + assert(msg->payload_size < sizeof(conn->msg.payload)); + conn->msg.header.payload_size = msg->payload_size; + memcpy(&conn->msg.payload, msg->payload, msg->payload_size); + + ret = hyperv_post_msg(conn->sint_route, &conn->msg); + + switch (ret) { + case 0: + return HV_STATUS_SUCCESS; + case -EAGAIN: + return HV_STATUS_INSUFFICIENT_BUFFERS; + default: + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } +} + +static void msg_conn_create(HypervTestDev *dev, uint8_t vp_index, + uint8_t sint, uint8_t conn_id) +{ + TestMsgConn *conn; + + conn = g_new0(TestMsgConn, 1); + assert(conn); + + conn->conn_id = conn_id; + + conn->sint_route = hyperv_sint_route_new(vp_index, sint, msg_cb, conn); + assert(conn->sint_route); + + assert(!hyperv_set_msg_handler(conn->conn_id, msg_handler, conn)); + + QLIST_INSERT_HEAD(&dev->msg_conns, conn, le); +} + +static void msg_conn_destroy(HypervTestDev *dev, uint8_t conn_id) +{ + TestMsgConn *conn; + + QLIST_FOREACH(conn, &dev->msg_conns, le) { + if (conn->conn_id == conn_id) { + QLIST_REMOVE(conn, le); + hyperv_set_msg_handler(conn->conn_id, NULL, NULL); + hyperv_sint_route_unref(conn->sint_route); + g_free(conn); + return; + } + } + assert(false); +} + +static void evt_conn_handler(EventNotifier *notifier) +{ + TestEvtConn *conn = container_of(notifier, TestEvtConn, notifier); + + event_notifier_test_and_clear(notifier); + + /* signal the same event flag we've got */ + assert(!hyperv_set_event_flag(conn->sint_route, conn->conn_id)); +} + +static void evt_conn_create(HypervTestDev *dev, uint8_t vp_index, + uint8_t sint, uint8_t conn_id) +{ + TestEvtConn *conn; + + conn = g_new0(TestEvtConn, 1); + assert(conn); + + conn->conn_id = conn_id; + + conn->sint_route = hyperv_sint_route_new(vp_index, sint, NULL, NULL); + assert(conn->sint_route); + + assert(!event_notifier_init(&conn->notifier, false)); + + event_notifier_set_handler(&conn->notifier, evt_conn_handler); + + assert(!hyperv_set_event_flag_handler(conn_id, &conn->notifier)); + + QLIST_INSERT_HEAD(&dev->evt_conns, conn, le); +} + +static void evt_conn_destroy(HypervTestDev *dev, uint8_t conn_id) +{ + TestEvtConn *conn; + + QLIST_FOREACH(conn, &dev->evt_conns, le) { + if (conn->conn_id == conn_id) { + QLIST_REMOVE(conn, le); + hyperv_set_event_flag_handler(conn->conn_id, NULL); + event_notifier_set_handler(&conn->notifier, NULL); + event_notifier_cleanup(&conn->notifier); + hyperv_sint_route_unref(conn->sint_route); + g_free(conn); + return; + } + } + assert(false); +} + static uint64_t hv_test_dev_read(void *opaque, hwaddr addr, unsigned size) { return 0; @@ -105,6 +253,7 @@ static void hv_test_dev_write(void *opaque, hwaddr addr, uint64_t data, uint8_t sint = data & 0xFF; uint8_t vp_index = (data >> 8ULL) & 0xFF; uint8_t ctl = (data >> 16ULL) & 0xFF; + uint8_t conn_id = (data >> 24ULL) & 0xFF; switch (ctl) { case HV_TEST_DEV_SINT_ROUTE_CREATE: @@ -116,6 +265,18 @@ static void hv_test_dev_write(void *opaque, hwaddr addr, uint64_t data, case HV_TEST_DEV_SINT_ROUTE_SET_SINT: sint_route_set_sint(dev, vp_index, sint); break; + case HV_TEST_DEV_MSG_CONN_CREATE: + msg_conn_create(dev, vp_index, sint, conn_id); + break; + case HV_TEST_DEV_MSG_CONN_DESTROY: + msg_conn_destroy(dev, conn_id); + break; + case HV_TEST_DEV_EVT_CONN_CREATE: + evt_conn_create(dev, vp_index, sint, conn_id); + break; + case HV_TEST_DEV_EVT_CONN_DESTROY: + evt_conn_destroy(dev, conn_id); + break; default: break; } @@ -136,6 +297,8 @@ static void hv_test_dev_realizefn(DeviceState *d, Error **errp) MemoryRegion *io = isa_address_space_io(isa); QLIST_INIT(&dev->sint_routes); + QLIST_INIT(&dev->msg_conns); + QLIST_INIT(&dev->evt_conns); memory_region_init_io(&dev->sint_control, OBJECT(dev), &synic_test_sint_ops, dev, "hyperv-testdev-ctl", 4); From 0c2ed83fa45aa5d80ecc7d3fff0ab38db2db5972 Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Sat, 6 Oct 2018 02:18:16 -0700 Subject: [PATCH 46/47] target/i386: kvm: just return after migrate_add_blocker failed When migrate_add_blocker failed, the invtsc_mig_blocker is not appended so no need to remove. This can save several instructions. Signed-off-by: Li Qiang Message-Id: <20181006091816.7659-1-liq3ea@163.com> Signed-off-by: Paolo Bonzini --- target/i386/kvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 7b7a56593e..115d8b4c14 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1198,7 +1198,7 @@ int kvm_arch_init_vcpu(CPUState *cs) if (local_err) { error_report_err(local_err); error_free(invtsc_mig_blocker); - goto fail; + return r; } /* for savevm */ vmstate_x86_cpu.unmigratable = 1; From 74c0b816adfc6aa1b01b4426fdf385e32e35cbac Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Oct 2018 13:24:14 +0200 Subject: [PATCH 47/47] replay: pass raw icount value to replay_save_clock This avoids lock recursion when REPLAY_CLOCK is called inside the timers spinlock. Signed-off-by: Paolo Bonzini --- cpus.c | 4 ++-- include/sysemu/replay.h | 10 ++++++++-- replay/replay-internal.c | 25 +++++++++++++++---------- replay/replay-internal.h | 2 ++ replay/replay-time.c | 8 +++++--- stubs/cpu-get-icount.c | 5 +++++ stubs/replay.c | 2 +- 7 files changed, 38 insertions(+), 18 deletions(-) diff --git a/cpus.c b/cpus.c index 361678e459..1c741bceb5 100644 --- a/cpus.c +++ b/cpus.c @@ -509,8 +509,8 @@ static void icount_warp_rt(void) seqlock_write_lock(&timers_state.vm_clock_seqlock, &timers_state.vm_clock_lock); if (runstate_is_running()) { - int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, - cpu_get_clock_locked()); + int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, + cpu_get_clock_locked()); int64_t warp_delta; warp_delta = clock - timers_state.vm_clock_warp_start; diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h index 7f7a594eca..3a7c58e423 100644 --- a/include/sysemu/replay.h +++ b/include/sysemu/replay.h @@ -100,14 +100,20 @@ bool replay_has_interrupt(void); /* Processing clocks and other time sources */ /*! Save the specified clock */ -int64_t replay_save_clock(ReplayClockKind kind, int64_t clock); +int64_t replay_save_clock(ReplayClockKind kind, int64_t clock, + int64_t raw_icount); /*! Read the specified clock from the log or return cached data */ int64_t replay_read_clock(ReplayClockKind kind); /*! Saves or reads the clock depending on the current replay mode. */ #define REPLAY_CLOCK(clock, value) \ (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock)) \ : replay_mode == REPLAY_MODE_RECORD \ - ? replay_save_clock((clock), (value)) \ + ? replay_save_clock((clock), (value), cpu_get_icount_raw()) \ + : (value)) +#define REPLAY_CLOCK_LOCKED(clock, value) \ + (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock)) \ + : replay_mode == REPLAY_MODE_RECORD \ + ? replay_save_clock((clock), (value), cpu_get_icount_raw_locked()) \ : (value)) /* Events */ diff --git a/replay/replay-internal.c b/replay/replay-internal.c index b077cb5fd5..1cea1d4dc9 100644 --- a/replay/replay-internal.c +++ b/replay/replay-internal.c @@ -217,20 +217,25 @@ void replay_mutex_unlock(void) } } +void replay_advance_current_step(uint64_t current_step) +{ + int diff = (int)(replay_get_current_step() - replay_state.current_step); + + /* Time can only go forward */ + assert(diff >= 0); + + if (diff > 0) { + replay_put_event(EVENT_INSTRUCTION); + replay_put_dword(diff); + replay_state.current_step += diff; + } +} + /*! Saves cached instructions. */ void replay_save_instructions(void) { if (replay_file && replay_mode == REPLAY_MODE_RECORD) { g_assert(replay_mutex_locked()); - int diff = (int)(replay_get_current_step() - replay_state.current_step); - - /* Time can only go forward */ - assert(diff >= 0); - - if (diff > 0) { - replay_put_event(EVENT_INSTRUCTION); - replay_put_dword(diff); - replay_state.current_step += diff; - } + replay_advance_current_step(replay_get_current_step()); } } diff --git a/replay/replay-internal.h b/replay/replay-internal.h index 9b0fd916a3..af6f4d55d4 100644 --- a/replay/replay-internal.h +++ b/replay/replay-internal.h @@ -122,6 +122,8 @@ void replay_finish_event(void); data_kind variable. */ void replay_fetch_data_kind(void); +/*! Advance replay_state.current_step to the specified value. */ +void replay_advance_current_step(uint64_t current_step); /*! Saves queued events (like instructions and sound). */ void replay_save_instructions(void); diff --git a/replay/replay-time.c b/replay/replay-time.c index 6a7565ec8d..0df1693337 100644 --- a/replay/replay-time.c +++ b/replay/replay-time.c @@ -15,13 +15,15 @@ #include "replay-internal.h" #include "qemu/error-report.h" -int64_t replay_save_clock(ReplayClockKind kind, int64_t clock) +int64_t replay_save_clock(ReplayClockKind kind, int64_t clock, int64_t raw_icount) { - if (replay_file) { g_assert(replay_mutex_locked()); - replay_save_instructions(); + /* Due to the caller's locking requirements we get the icount from it + * instead of using replay_save_instructions(). + */ + replay_advance_current_step(raw_icount); replay_put_event(EVENT_CLOCK + kind); replay_put_qword(clock); } diff --git a/stubs/cpu-get-icount.c b/stubs/cpu-get-icount.c index 0b7239d721..35f0c1e24c 100644 --- a/stubs/cpu-get-icount.c +++ b/stubs/cpu-get-icount.c @@ -11,6 +11,11 @@ int64_t cpu_get_icount(void) abort(); } +int64_t cpu_get_icount_raw(void) +{ + abort(); +} + void qemu_timer_notify_cb(void *opaque, QEMUClockType type) { qemu_notify_event(); diff --git a/stubs/replay.c b/stubs/replay.c index 04279abb2c..4ac607895d 100644 --- a/stubs/replay.c +++ b/stubs/replay.c @@ -4,7 +4,7 @@ ReplayMode replay_mode; -int64_t replay_save_clock(unsigned int kind, int64_t clock) +int64_t replay_save_clock(unsigned int kind, int64_t clock, int64_t raw_icount) { abort(); return 0;