diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index de12f78eb8..4880a05399 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -79,6 +79,7 @@ struct KVMState int fd; int vmfd; int coalesced_mmio; + int coalesced_pio; struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; bool coalesced_flush_in_progress; int vcpu_events; @@ -560,6 +561,45 @@ static void kvm_uncoalesce_mmio_region(MemoryListener *listener, } } +static void kvm_coalesce_pio_add(MemoryListener *listener, + MemoryRegionSection *section, + hwaddr start, hwaddr size) +{ + KVMState *s = kvm_state; + + if (s->coalesced_pio) { + struct kvm_coalesced_mmio_zone zone; + + zone.addr = start; + zone.size = size; + zone.pio = 1; + + (void)kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone); + } +} + +static void kvm_coalesce_pio_del(MemoryListener *listener, + MemoryRegionSection *section, + hwaddr start, hwaddr size) +{ + KVMState *s = kvm_state; + + if (s->coalesced_pio) { + struct kvm_coalesced_mmio_zone zone; + + zone.addr = start; + zone.size = size; + zone.pio = 1; + + (void)kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone); + } +} + +static MemoryListener kvm_coalesced_pio_listener = { + .coalesced_io_add = kvm_coalesce_pio_add, + .coalesced_io_del = kvm_coalesce_pio_del, +}; + int kvm_check_extension(KVMState *s, unsigned int extension) { int ret; @@ -1616,6 +1656,8 @@ static int kvm_init(MachineState *ms) } s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO); + s->coalesced_pio = s->coalesced_mmio && + kvm_check_extension(s, KVM_CAP_COALESCED_PIO); #ifdef KVM_CAP_VCPU_EVENTS s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS); @@ -1686,13 +1728,15 @@ static int kvm_init(MachineState *ms) s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add; s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del; } - s->memory_listener.listener.coalesced_mmio_add = kvm_coalesce_mmio_region; - s->memory_listener.listener.coalesced_mmio_del = kvm_uncoalesce_mmio_region; + s->memory_listener.listener.coalesced_io_add = kvm_coalesce_mmio_region; + s->memory_listener.listener.coalesced_io_del = kvm_uncoalesce_mmio_region; kvm_memory_listener_register(s, &s->memory_listener, &address_space_memory, 0); memory_listener_register(&kvm_io_listener, &address_space_io); + memory_listener_register(&kvm_coalesced_pio_listener, + &address_space_io); s->many_ioeventfds = kvm_check_many_ioeventfds(); @@ -1778,7 +1822,13 @@ void kvm_flush_coalesced_mmio_buffer(void) ent = &ring->coalesced_mmio[ring->first]; - cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len); + if (ent->pio == 1) { + address_space_rw(&address_space_io, ent->phys_addr, + MEMTXATTRS_UNSPECIFIED, ent->data, + ent->len, true); + } else { + cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len); + } smp_wmb(); ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX; } diff --git a/cpus.c b/cpus.c index cce64874e6..bb2a511483 100644 --- a/cpus.c +++ b/cpus.c @@ -509,8 +509,8 @@ static void icount_warp_rt(void) seqlock_write_lock(&timers_state.vm_clock_seqlock, &timers_state.vm_clock_lock); if (runstate_is_running()) { - int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT, - cpu_get_clock_locked()); + int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, + cpu_get_clock_locked()); int64_t warp_delta; warp_delta = clock - timers_state.vm_clock_warp_start; diff --git a/default-configs/hyperv.mak b/default-configs/hyperv.mak new file mode 100644 index 0000000000..5d0d9fd830 --- /dev/null +++ b/default-configs/hyperv.mak @@ -0,0 +1,2 @@ +CONFIG_HYPERV=$(CONFIG_KVM) +CONFIG_HYPERV_TESTDEV=y diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index 8c7d4a0fa0..210cff2781 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -3,6 +3,7 @@ include pci.mak include sound.mak include usb.mak +include hyperv.mak CONFIG_QXL=$(CONFIG_SPICE) CONFIG_VGA_ISA=y CONFIG_VGA_CIRRUS=y @@ -58,7 +59,6 @@ CONFIG_XIO3130=y CONFIG_IOH3420=y CONFIG_I82801B11=y CONFIG_SMBIOS=y -CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM) CONFIG_PXB=y CONFIG_ACPI_VMGENID=y CONFIG_FW_CFG_DMA=y diff --git a/hw/Makefile.objs b/hw/Makefile.objs index a19c1417ed..30722ccf98 100644 --- a/hw/Makefile.objs +++ b/hw/Makefile.objs @@ -9,6 +9,7 @@ devices-dirs-$(CONFIG_SOFTMMU) += cpu/ devices-dirs-$(CONFIG_SOFTMMU) += display/ devices-dirs-$(CONFIG_SOFTMMU) += dma/ devices-dirs-$(CONFIG_SOFTMMU) += gpio/ +devices-dirs-$(CONFIG_HYPERV) += hyperv/ devices-dirs-$(CONFIG_SOFTMMU) += i2c/ devices-dirs-$(CONFIG_SOFTMMU) += ide/ devices-dirs-$(CONFIG_SOFTMMU) += input/ diff --git a/hw/audio/es1370.c b/hw/audio/es1370.c index 4f980a598b..97789a0771 100644 --- a/hw/audio/es1370.c +++ b/hw/audio/es1370.c @@ -585,10 +585,13 @@ static uint64_t es1370_read(void *opaque, hwaddr addr, unsigned size) #endif break; + case ES1370_REG_ADC_FRAMECNT: + d += 2; + goto framecnt; case ES1370_REG_DAC1_FRAMECNT: case ES1370_REG_DAC2_FRAMECNT: - case ES1370_REG_ADC_FRAMECNT: d += (addr - ES1370_REG_DAC1_FRAMECNT) >> 3; + framecnt: val = d->frame_cnt; #ifdef DEBUG_ES1370 { @@ -602,10 +605,13 @@ static uint64_t es1370_read(void *opaque, hwaddr addr, unsigned size) #endif break; + case ES1370_REG_ADC_FRAMEADR: + d += 2; + goto frameadr; case ES1370_REG_DAC1_FRAMEADR: case ES1370_REG_DAC2_FRAMEADR: - case ES1370_REG_ADC_FRAMEADR: d += (addr - ES1370_REG_DAC1_FRAMEADR) >> 3; + frameadr: val = d->frame_addr; break; diff --git a/hw/core/hotplug.c b/hw/core/hotplug.c index 2253072d0e..17ac986685 100644 --- a/hw/core/hotplug.c +++ b/hw/core/hotplug.c @@ -35,16 +35,6 @@ void hotplug_handler_plug(HotplugHandler *plug_handler, } } -void hotplug_handler_post_plug(HotplugHandler *plug_handler, - DeviceState *plugged_dev) -{ - HotplugHandlerClass *hdc = HOTPLUG_HANDLER_GET_CLASS(plug_handler); - - if (hdc->post_plug) { - hdc->post_plug(plug_handler, plugged_dev); - } -} - void hotplug_handler_unplug_request(HotplugHandler *plug_handler, DeviceState *plugged_dev, Error **errp) diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 046d8f1f76..6b3cc55b27 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -832,14 +832,6 @@ static void device_set_realized(Object *obj, bool value, Error **errp) DEVICE_LISTENER_CALL(realize, Forward, dev); - if (hotplug_ctrl) { - hotplug_handler_plug(hotplug_ctrl, dev, &local_err); - } - - if (local_err != NULL) { - goto post_realize_fail; - } - /* * always free/re-initialize here since the value cannot be cleaned up * in device_unrealize due to its usage later on in the unplug path @@ -869,8 +861,12 @@ static void device_set_realized(Object *obj, bool value, Error **errp) dev->pending_deleted_event = false; if (hotplug_ctrl) { - hotplug_handler_post_plug(hotplug_ctrl, dev); - } + hotplug_handler_plug(hotplug_ctrl, dev, &local_err); + if (local_err != NULL) { + goto child_realize_fail; + } + } + } else if (!value && dev->realized) { Error **local_errp = NULL; QLIST_FOREACH(bus, &dev->child_bus, sibling) { diff --git a/hw/hyperv/Makefile.objs b/hw/hyperv/Makefile.objs new file mode 100644 index 0000000000..edaca2f763 --- /dev/null +++ b/hw/hyperv/Makefile.objs @@ -0,0 +1,2 @@ +obj-y += hyperv.o +obj-$(CONFIG_HYPERV_TESTDEV) += hyperv_testdev.o diff --git a/hw/hyperv/hyperv.c b/hw/hyperv/hyperv.c new file mode 100644 index 0000000000..a28e7249d8 --- /dev/null +++ b/hw/hyperv/hyperv.c @@ -0,0 +1,654 @@ +/* + * Hyper-V guest/hypervisor interaction + * + * Copyright (c) 2015-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "qapi/error.h" +#include "exec/address-spaces.h" +#include "sysemu/kvm.h" +#include "qemu/bitops.h" +#include "qemu/error-report.h" +#include "qemu/queue.h" +#include "qemu/rcu.h" +#include "qemu/rcu_queue.h" +#include "hw/hyperv/hyperv.h" + +typedef struct SynICState { + DeviceState parent_obj; + + CPUState *cs; + + bool enabled; + hwaddr msg_page_addr; + hwaddr event_page_addr; + MemoryRegion msg_page_mr; + MemoryRegion event_page_mr; + struct hyperv_message_page *msg_page; + struct hyperv_event_flags_page *event_page; +} SynICState; + +#define TYPE_SYNIC "hyperv-synic" +#define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC) + +static SynICState *get_synic(CPUState *cs) +{ + return SYNIC(object_resolve_path_component(OBJECT(cs), "synic")); +} + +static void synic_update(SynICState *synic, bool enable, + hwaddr msg_page_addr, hwaddr event_page_addr) +{ + + synic->enabled = enable; + if (synic->msg_page_addr != msg_page_addr) { + if (synic->msg_page_addr) { + memory_region_del_subregion(get_system_memory(), + &synic->msg_page_mr); + } + if (msg_page_addr) { + memory_region_add_subregion(get_system_memory(), msg_page_addr, + &synic->msg_page_mr); + } + synic->msg_page_addr = msg_page_addr; + } + if (synic->event_page_addr != event_page_addr) { + if (synic->event_page_addr) { + memory_region_del_subregion(get_system_memory(), + &synic->event_page_mr); + } + if (event_page_addr) { + memory_region_add_subregion(get_system_memory(), event_page_addr, + &synic->event_page_mr); + } + synic->event_page_addr = event_page_addr; + } +} + +void hyperv_synic_update(CPUState *cs, bool enable, + hwaddr msg_page_addr, hwaddr event_page_addr) +{ + SynICState *synic = get_synic(cs); + + if (!synic) { + return; + } + + synic_update(synic, enable, msg_page_addr, event_page_addr); +} + +static void synic_realize(DeviceState *dev, Error **errp) +{ + Object *obj = OBJECT(dev); + SynICState *synic = SYNIC(dev); + char *msgp_name, *eventp_name; + uint32_t vp_index; + + /* memory region names have to be globally unique */ + vp_index = hyperv_vp_index(synic->cs); + msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index); + eventp_name = g_strdup_printf("synic-%u-event-page", vp_index); + + memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name, + sizeof(*synic->msg_page), &error_abort); + memory_region_init_ram(&synic->event_page_mr, obj, eventp_name, + sizeof(*synic->event_page), &error_abort); + synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr); + synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr); + + g_free(msgp_name); + g_free(eventp_name); +} +static void synic_reset(DeviceState *dev) +{ + SynICState *synic = SYNIC(dev); + memset(synic->msg_page, 0, sizeof(*synic->msg_page)); + memset(synic->event_page, 0, sizeof(*synic->event_page)); + synic_update(synic, false, 0, 0); +} + +static void synic_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = synic_realize; + dc->reset = synic_reset; + dc->user_creatable = false; +} + +void hyperv_synic_add(CPUState *cs) +{ + Object *obj; + SynICState *synic; + + obj = object_new(TYPE_SYNIC); + synic = SYNIC(obj); + synic->cs = cs; + object_property_add_child(OBJECT(cs), "synic", obj, &error_abort); + object_unref(obj); + object_property_set_bool(obj, true, "realized", &error_abort); +} + +void hyperv_synic_reset(CPUState *cs) +{ + device_reset(DEVICE(get_synic(cs))); +} + +static const TypeInfo synic_type_info = { + .name = TYPE_SYNIC, + .parent = TYPE_DEVICE, + .instance_size = sizeof(SynICState), + .class_init = synic_class_init, +}; + +static void synic_register_types(void) +{ + type_register_static(&synic_type_info); +} + +type_init(synic_register_types) + +/* + * KVM has its own message producers (SynIC timers). To guarantee + * serialization with both KVM vcpu and the guest cpu, the messages are first + * staged in an intermediate area and then posted to the SynIC message page in + * the vcpu thread. + */ +typedef struct HvSintStagedMessage { + /* message content staged by hyperv_post_msg */ + struct hyperv_message msg; + /* callback + data (r/o) to complete the processing in a BH */ + HvSintMsgCb cb; + void *cb_data; + /* message posting status filled by cpu_post_msg */ + int status; + /* passing the buck: */ + enum { + /* initial state */ + HV_STAGED_MSG_FREE, + /* + * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE -> + * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu + */ + HV_STAGED_MSG_BUSY, + /* + * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot, + * notify the guest, records the status, marks the posting done (BUSY + * -> POSTED), and schedules sint_msg_bh BH + */ + HV_STAGED_MSG_POSTED, + /* + * sint_msg_bh (BH) verifies that the posting is done, runs the + * callback, and starts over (POSTED -> FREE) + */ + } state; +} HvSintStagedMessage; + +struct HvSintRoute { + uint32_t sint; + SynICState *synic; + int gsi; + EventNotifier sint_set_notifier; + EventNotifier sint_ack_notifier; + + HvSintStagedMessage *staged_msg; + + unsigned refcount; +}; + +static CPUState *hyperv_find_vcpu(uint32_t vp_index) +{ + CPUState *cs = qemu_get_cpu(vp_index); + assert(hyperv_vp_index(cs) == vp_index); + return cs; +} + +/* + * BH to complete the processing of a staged message. + */ +static void sint_msg_bh(void *opaque) +{ + HvSintRoute *sint_route = opaque; + HvSintStagedMessage *staged_msg = sint_route->staged_msg; + + if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) { + /* status nor ready yet (spurious ack from guest?), ignore */ + return; + } + + staged_msg->cb(staged_msg->cb_data, staged_msg->status); + staged_msg->status = 0; + + /* staged message processing finished, ready to start over */ + atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE); + /* drop the reference taken in hyperv_post_msg */ + hyperv_sint_route_unref(sint_route); +} + +/* + * Worker to transfer the message from the staging area into the SynIC message + * page in vcpu context. + */ +static void cpu_post_msg(CPUState *cs, run_on_cpu_data data) +{ + HvSintRoute *sint_route = data.host_ptr; + HvSintStagedMessage *staged_msg = sint_route->staged_msg; + SynICState *synic = sint_route->synic; + struct hyperv_message *dst_msg; + bool wait_for_sint_ack = false; + + assert(staged_msg->state == HV_STAGED_MSG_BUSY); + + if (!synic->enabled || !synic->msg_page_addr) { + staged_msg->status = -ENXIO; + goto posted; + } + + dst_msg = &synic->msg_page->slot[sint_route->sint]; + + if (dst_msg->header.message_type != HV_MESSAGE_NONE) { + dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING; + staged_msg->status = -EAGAIN; + wait_for_sint_ack = true; + } else { + memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg)); + staged_msg->status = hyperv_sint_route_set_sint(sint_route); + } + + memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page)); + +posted: + atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED); + /* + * Notify the msg originator of the progress made; if the slot was busy we + * set msg_pending flag in it so it will be the guest who will do EOM and + * trigger the notification from KVM via sint_ack_notifier + */ + if (!wait_for_sint_ack) { + aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, + sint_route); + } +} + +/* + * Post a Hyper-V message to the staging area, for delivery to guest in the + * vcpu thread. + */ +int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg) +{ + HvSintStagedMessage *staged_msg = sint_route->staged_msg; + + assert(staged_msg); + + /* grab the staging area */ + if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE, + HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) { + return -EAGAIN; + } + + memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg)); + + /* hold a reference on sint_route until the callback is finished */ + hyperv_sint_route_ref(sint_route); + + /* schedule message posting attempt in vcpu thread */ + async_run_on_cpu(sint_route->synic->cs, cpu_post_msg, + RUN_ON_CPU_HOST_PTR(sint_route)); + return 0; +} + +static void sint_ack_handler(EventNotifier *notifier) +{ + HvSintRoute *sint_route = container_of(notifier, HvSintRoute, + sint_ack_notifier); + event_notifier_test_and_clear(notifier); + + /* + * the guest consumed the previous message so complete the current one with + * -EAGAIN and let the msg originator retry + */ + aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route); +} + +/* + * Set given event flag for a given sint on a given vcpu, and signal the sint. + */ +int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno) +{ + int ret; + SynICState *synic = sint_route->synic; + unsigned long *flags, set_mask; + unsigned set_idx; + + if (eventno > HV_EVENT_FLAGS_COUNT) { + return -EINVAL; + } + if (!synic->enabled || !synic->event_page_addr) { + return -ENXIO; + } + + set_idx = BIT_WORD(eventno); + set_mask = BIT_MASK(eventno); + flags = synic->event_page->slot[sint_route->sint].flags; + + if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) { + memory_region_set_dirty(&synic->event_page_mr, 0, + sizeof(*synic->event_page)); + ret = hyperv_sint_route_set_sint(sint_route); + } else { + ret = 0; + } + return ret; +} + +HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, + HvSintMsgCb cb, void *cb_data) +{ + HvSintRoute *sint_route; + EventNotifier *ack_notifier; + int r, gsi; + CPUState *cs; + SynICState *synic; + + cs = hyperv_find_vcpu(vp_index); + if (!cs) { + return NULL; + } + + synic = get_synic(cs); + if (!synic) { + return NULL; + } + + sint_route = g_new0(HvSintRoute, 1); + r = event_notifier_init(&sint_route->sint_set_notifier, false); + if (r) { + goto err; + } + + + ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL; + if (ack_notifier) { + sint_route->staged_msg = g_new0(HvSintStagedMessage, 1); + sint_route->staged_msg->cb = cb; + sint_route->staged_msg->cb_data = cb_data; + + r = event_notifier_init(ack_notifier, false); + if (r) { + goto err_sint_set_notifier; + } + + event_notifier_set_handler(ack_notifier, sint_ack_handler); + } + + gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); + if (gsi < 0) { + goto err_gsi; + } + + r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, + &sint_route->sint_set_notifier, + ack_notifier, gsi); + if (r) { + goto err_irqfd; + } + sint_route->gsi = gsi; + sint_route->synic = synic; + sint_route->sint = sint; + sint_route->refcount = 1; + + return sint_route; + +err_irqfd: + kvm_irqchip_release_virq(kvm_state, gsi); +err_gsi: + if (ack_notifier) { + event_notifier_set_handler(ack_notifier, NULL); + event_notifier_cleanup(ack_notifier); + g_free(sint_route->staged_msg); + } +err_sint_set_notifier: + event_notifier_cleanup(&sint_route->sint_set_notifier); +err: + g_free(sint_route); + + return NULL; +} + +void hyperv_sint_route_ref(HvSintRoute *sint_route) +{ + sint_route->refcount++; +} + +void hyperv_sint_route_unref(HvSintRoute *sint_route) +{ + if (!sint_route) { + return; + } + + assert(sint_route->refcount > 0); + + if (--sint_route->refcount) { + return; + } + + kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, + &sint_route->sint_set_notifier, + sint_route->gsi); + kvm_irqchip_release_virq(kvm_state, sint_route->gsi); + if (sint_route->staged_msg) { + event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); + event_notifier_cleanup(&sint_route->sint_ack_notifier); + g_free(sint_route->staged_msg); + } + event_notifier_cleanup(&sint_route->sint_set_notifier); + g_free(sint_route); +} + +int hyperv_sint_route_set_sint(HvSintRoute *sint_route) +{ + return event_notifier_set(&sint_route->sint_set_notifier); +} + +typedef struct MsgHandler { + struct rcu_head rcu; + QLIST_ENTRY(MsgHandler) link; + uint32_t conn_id; + HvMsgHandler handler; + void *data; +} MsgHandler; + +typedef struct EventFlagHandler { + struct rcu_head rcu; + QLIST_ENTRY(EventFlagHandler) link; + uint32_t conn_id; + EventNotifier *notifier; +} EventFlagHandler; + +static QLIST_HEAD(, MsgHandler) msg_handlers; +static QLIST_HEAD(, EventFlagHandler) event_flag_handlers; +static QemuMutex handlers_mutex; + +static void __attribute__((constructor)) hv_init(void) +{ + QLIST_INIT(&msg_handlers); + QLIST_INIT(&event_flag_handlers); + qemu_mutex_init(&handlers_mutex); +} + +int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data) +{ + int ret; + MsgHandler *mh; + + qemu_mutex_lock(&handlers_mutex); + QLIST_FOREACH(mh, &msg_handlers, link) { + if (mh->conn_id == conn_id) { + if (handler) { + ret = -EEXIST; + } else { + QLIST_REMOVE_RCU(mh, link); + g_free_rcu(mh, rcu); + ret = 0; + } + goto unlock; + } + } + + if (handler) { + mh = g_new(MsgHandler, 1); + mh->conn_id = conn_id; + mh->handler = handler; + mh->data = data; + QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link); + ret = 0; + } else { + ret = -ENOENT; + } +unlock: + qemu_mutex_unlock(&handlers_mutex); + return ret; +} + +uint16_t hyperv_hcall_post_message(uint64_t param, bool fast) +{ + uint16_t ret; + hwaddr len; + struct hyperv_post_message_input *msg; + MsgHandler *mh; + + if (fast) { + return HV_STATUS_INVALID_HYPERCALL_CODE; + } + if (param & (__alignof__(*msg) - 1)) { + return HV_STATUS_INVALID_ALIGNMENT; + } + + len = sizeof(*msg); + msg = cpu_physical_memory_map(param, &len, 0); + if (len < sizeof(*msg)) { + ret = HV_STATUS_INSUFFICIENT_MEMORY; + goto unmap; + } + if (msg->payload_size > sizeof(msg->payload)) { + ret = HV_STATUS_INVALID_HYPERCALL_INPUT; + goto unmap; + } + + ret = HV_STATUS_INVALID_CONNECTION_ID; + rcu_read_lock(); + QLIST_FOREACH_RCU(mh, &msg_handlers, link) { + if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) { + ret = mh->handler(msg, mh->data); + break; + } + } + rcu_read_unlock(); + +unmap: + cpu_physical_memory_unmap(msg, len, 0, 0); + return ret; +} + +static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) +{ + int ret; + EventFlagHandler *handler; + + qemu_mutex_lock(&handlers_mutex); + QLIST_FOREACH(handler, &event_flag_handlers, link) { + if (handler->conn_id == conn_id) { + if (notifier) { + ret = -EEXIST; + } else { + QLIST_REMOVE_RCU(handler, link); + g_free_rcu(handler, rcu); + ret = 0; + } + goto unlock; + } + } + + if (notifier) { + handler = g_new(EventFlagHandler, 1); + handler->conn_id = conn_id; + handler->notifier = notifier; + QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link); + ret = 0; + } else { + ret = -ENOENT; + } +unlock: + qemu_mutex_unlock(&handlers_mutex); + return ret; +} + +static bool process_event_flags_userspace; + +int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) +{ + if (!process_event_flags_userspace && + !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) { + process_event_flags_userspace = true; + + warn_report("Hyper-V event signaling is not supported by this kernel; " + "using slower userspace hypercall processing"); + } + + if (!process_event_flags_userspace) { + struct kvm_hyperv_eventfd hvevfd = { + .conn_id = conn_id, + .fd = notifier ? event_notifier_get_fd(notifier) : -1, + .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN, + }; + + return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd); + } + return set_event_flag_handler(conn_id, notifier); +} + +uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast) +{ + uint16_t ret; + EventFlagHandler *handler; + + if (unlikely(!fast)) { + hwaddr addr = param; + + if (addr & (__alignof__(addr) - 1)) { + return HV_STATUS_INVALID_ALIGNMENT; + } + + param = ldq_phys(&address_space_memory, addr); + } + + /* + * Per spec, bits 32-47 contain the extra "flag number". However, we + * have no use for it, and in all known usecases it is zero, so just + * report lookup failure if it isn't. + */ + if (param & 0xffff00000000ULL) { + return HV_STATUS_INVALID_PORT_ID; + } + /* remaining bits are reserved-zero */ + if (param & ~HV_CONNECTION_ID_MASK) { + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } + + ret = HV_STATUS_INVALID_CONNECTION_ID; + rcu_read_lock(); + QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) { + if (handler->conn_id == param) { + event_notifier_set(handler->notifier); + ret = 0; + break; + } + } + rcu_read_unlock(); + return ret; +} diff --git a/hw/hyperv/hyperv_testdev.c b/hw/hyperv/hyperv_testdev.c new file mode 100644 index 0000000000..4880333cf5 --- /dev/null +++ b/hw/hyperv/hyperv_testdev.c @@ -0,0 +1,327 @@ +/* + * QEMU KVM Hyper-V test device to support Hyper-V kvm-unit-tests + * + * Copyright (C) 2015 Andrey Smetanin + * + * Authors: + * Andrey Smetanin + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "qemu/queue.h" +#include "hw/qdev.h" +#include "hw/isa/isa.h" +#include "hw/hyperv/hyperv.h" + +typedef struct TestSintRoute { + QLIST_ENTRY(TestSintRoute) le; + uint8_t vp_index; + uint8_t sint; + HvSintRoute *sint_route; +} TestSintRoute; + +typedef struct TestMsgConn { + QLIST_ENTRY(TestMsgConn) le; + uint8_t conn_id; + HvSintRoute *sint_route; + struct hyperv_message msg; +} TestMsgConn; + +typedef struct TestEvtConn { + QLIST_ENTRY(TestEvtConn) le; + uint8_t conn_id; + HvSintRoute *sint_route; + EventNotifier notifier; +} TestEvtConn; + +struct HypervTestDev { + ISADevice parent_obj; + MemoryRegion sint_control; + QLIST_HEAD(, TestSintRoute) sint_routes; + QLIST_HEAD(, TestMsgConn) msg_conns; + QLIST_HEAD(, TestEvtConn) evt_conns; +}; +typedef struct HypervTestDev HypervTestDev; + +#define TYPE_HYPERV_TEST_DEV "hyperv-testdev" +#define HYPERV_TEST_DEV(obj) \ + OBJECT_CHECK(HypervTestDev, (obj), TYPE_HYPERV_TEST_DEV) + +enum { + HV_TEST_DEV_SINT_ROUTE_CREATE = 1, + HV_TEST_DEV_SINT_ROUTE_DESTROY, + HV_TEST_DEV_SINT_ROUTE_SET_SINT, + HV_TEST_DEV_MSG_CONN_CREATE, + HV_TEST_DEV_MSG_CONN_DESTROY, + HV_TEST_DEV_EVT_CONN_CREATE, + HV_TEST_DEV_EVT_CONN_DESTROY, +}; + +static void sint_route_create(HypervTestDev *dev, + uint8_t vp_index, uint8_t sint) +{ + TestSintRoute *sint_route; + + sint_route = g_new0(TestSintRoute, 1); + assert(sint_route); + + sint_route->vp_index = vp_index; + sint_route->sint = sint; + + sint_route->sint_route = hyperv_sint_route_new(vp_index, sint, NULL, NULL); + assert(sint_route->sint_route); + + QLIST_INSERT_HEAD(&dev->sint_routes, sint_route, le); +} + +static TestSintRoute *sint_route_find(HypervTestDev *dev, + uint8_t vp_index, uint8_t sint) +{ + TestSintRoute *sint_route; + + QLIST_FOREACH(sint_route, &dev->sint_routes, le) { + if (sint_route->vp_index == vp_index && sint_route->sint == sint) { + return sint_route; + } + } + assert(false); + return NULL; +} + +static void sint_route_destroy(HypervTestDev *dev, + uint8_t vp_index, uint8_t sint) +{ + TestSintRoute *sint_route; + + sint_route = sint_route_find(dev, vp_index, sint); + QLIST_REMOVE(sint_route, le); + hyperv_sint_route_unref(sint_route->sint_route); + g_free(sint_route); +} + +static void sint_route_set_sint(HypervTestDev *dev, + uint8_t vp_index, uint8_t sint) +{ + TestSintRoute *sint_route; + + sint_route = sint_route_find(dev, vp_index, sint); + + hyperv_sint_route_set_sint(sint_route->sint_route); +} + +static void msg_retry(void *opaque) +{ + TestMsgConn *conn = opaque; + assert(!hyperv_post_msg(conn->sint_route, &conn->msg)); +} + +static void msg_cb(void *data, int status) +{ + TestMsgConn *conn = data; + + if (!status) { + return; + } + + assert(status == -EAGAIN); + + aio_bh_schedule_oneshot(qemu_get_aio_context(), msg_retry, conn); +} + +static uint16_t msg_handler(const struct hyperv_post_message_input *msg, + void *data) +{ + int ret; + TestMsgConn *conn = data; + + /* post the same message we've got */ + conn->msg.header.message_type = msg->message_type; + assert(msg->payload_size < sizeof(conn->msg.payload)); + conn->msg.header.payload_size = msg->payload_size; + memcpy(&conn->msg.payload, msg->payload, msg->payload_size); + + ret = hyperv_post_msg(conn->sint_route, &conn->msg); + + switch (ret) { + case 0: + return HV_STATUS_SUCCESS; + case -EAGAIN: + return HV_STATUS_INSUFFICIENT_BUFFERS; + default: + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } +} + +static void msg_conn_create(HypervTestDev *dev, uint8_t vp_index, + uint8_t sint, uint8_t conn_id) +{ + TestMsgConn *conn; + + conn = g_new0(TestMsgConn, 1); + assert(conn); + + conn->conn_id = conn_id; + + conn->sint_route = hyperv_sint_route_new(vp_index, sint, msg_cb, conn); + assert(conn->sint_route); + + assert(!hyperv_set_msg_handler(conn->conn_id, msg_handler, conn)); + + QLIST_INSERT_HEAD(&dev->msg_conns, conn, le); +} + +static void msg_conn_destroy(HypervTestDev *dev, uint8_t conn_id) +{ + TestMsgConn *conn; + + QLIST_FOREACH(conn, &dev->msg_conns, le) { + if (conn->conn_id == conn_id) { + QLIST_REMOVE(conn, le); + hyperv_set_msg_handler(conn->conn_id, NULL, NULL); + hyperv_sint_route_unref(conn->sint_route); + g_free(conn); + return; + } + } + assert(false); +} + +static void evt_conn_handler(EventNotifier *notifier) +{ + TestEvtConn *conn = container_of(notifier, TestEvtConn, notifier); + + event_notifier_test_and_clear(notifier); + + /* signal the same event flag we've got */ + assert(!hyperv_set_event_flag(conn->sint_route, conn->conn_id)); +} + +static void evt_conn_create(HypervTestDev *dev, uint8_t vp_index, + uint8_t sint, uint8_t conn_id) +{ + TestEvtConn *conn; + + conn = g_new0(TestEvtConn, 1); + assert(conn); + + conn->conn_id = conn_id; + + conn->sint_route = hyperv_sint_route_new(vp_index, sint, NULL, NULL); + assert(conn->sint_route); + + assert(!event_notifier_init(&conn->notifier, false)); + + event_notifier_set_handler(&conn->notifier, evt_conn_handler); + + assert(!hyperv_set_event_flag_handler(conn_id, &conn->notifier)); + + QLIST_INSERT_HEAD(&dev->evt_conns, conn, le); +} + +static void evt_conn_destroy(HypervTestDev *dev, uint8_t conn_id) +{ + TestEvtConn *conn; + + QLIST_FOREACH(conn, &dev->evt_conns, le) { + if (conn->conn_id == conn_id) { + QLIST_REMOVE(conn, le); + hyperv_set_event_flag_handler(conn->conn_id, NULL); + event_notifier_set_handler(&conn->notifier, NULL); + event_notifier_cleanup(&conn->notifier); + hyperv_sint_route_unref(conn->sint_route); + g_free(conn); + return; + } + } + assert(false); +} + +static uint64_t hv_test_dev_read(void *opaque, hwaddr addr, unsigned size) +{ + return 0; +} + +static void hv_test_dev_write(void *opaque, hwaddr addr, uint64_t data, + uint32_t len) +{ + HypervTestDev *dev = HYPERV_TEST_DEV(opaque); + uint8_t sint = data & 0xFF; + uint8_t vp_index = (data >> 8ULL) & 0xFF; + uint8_t ctl = (data >> 16ULL) & 0xFF; + uint8_t conn_id = (data >> 24ULL) & 0xFF; + + switch (ctl) { + case HV_TEST_DEV_SINT_ROUTE_CREATE: + sint_route_create(dev, vp_index, sint); + break; + case HV_TEST_DEV_SINT_ROUTE_DESTROY: + sint_route_destroy(dev, vp_index, sint); + break; + case HV_TEST_DEV_SINT_ROUTE_SET_SINT: + sint_route_set_sint(dev, vp_index, sint); + break; + case HV_TEST_DEV_MSG_CONN_CREATE: + msg_conn_create(dev, vp_index, sint, conn_id); + break; + case HV_TEST_DEV_MSG_CONN_DESTROY: + msg_conn_destroy(dev, conn_id); + break; + case HV_TEST_DEV_EVT_CONN_CREATE: + evt_conn_create(dev, vp_index, sint, conn_id); + break; + case HV_TEST_DEV_EVT_CONN_DESTROY: + evt_conn_destroy(dev, conn_id); + break; + default: + break; + } +} + +static const MemoryRegionOps synic_test_sint_ops = { + .read = hv_test_dev_read, + .write = hv_test_dev_write, + .valid.min_access_size = 4, + .valid.max_access_size = 4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void hv_test_dev_realizefn(DeviceState *d, Error **errp) +{ + ISADevice *isa = ISA_DEVICE(d); + HypervTestDev *dev = HYPERV_TEST_DEV(d); + MemoryRegion *io = isa_address_space_io(isa); + + QLIST_INIT(&dev->sint_routes); + QLIST_INIT(&dev->msg_conns); + QLIST_INIT(&dev->evt_conns); + memory_region_init_io(&dev->sint_control, OBJECT(dev), + &synic_test_sint_ops, dev, + "hyperv-testdev-ctl", 4); + memory_region_add_subregion(io, 0x3000, &dev->sint_control); +} + +static void hv_test_dev_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + dc->realize = hv_test_dev_realizefn; +} + +static const TypeInfo hv_test_dev_info = { + .name = TYPE_HYPERV_TEST_DEV, + .parent = TYPE_ISA_DEVICE, + .instance_size = sizeof(HypervTestDev), + .class_init = hv_test_dev_class_init, +}; + +static void hv_test_dev_register_types(void) +{ + type_register_static(&hv_test_dev_info); +} +type_init(hv_test_dev_register_types); diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs index 6d50b03cfd..680350b3c3 100644 --- a/hw/misc/Makefile.objs +++ b/hw/misc/Makefile.objs @@ -71,7 +71,6 @@ obj-$(CONFIG_IOTKIT_SYSCTL) += iotkit-sysctl.o obj-$(CONFIG_IOTKIT_SYSINFO) += iotkit-sysinfo.o obj-$(CONFIG_PVPANIC) += pvpanic.o -obj-$(CONFIG_HYPERV_TESTDEV) += hyperv_testdev.o obj-$(CONFIG_AUX) += auxbus.o obj-$(CONFIG_ASPEED_SOC) += aspeed_scu.o aspeed_sdmc.o obj-$(CONFIG_MSF2) += msf2-sysreg.o diff --git a/hw/misc/edu.c b/hw/misc/edu.c index 0687ffd343..cdcf550dd7 100644 --- a/hw/misc/edu.c +++ b/hw/misc/edu.c @@ -342,7 +342,7 @@ static void *edu_fact_thread(void *opaque) static void pci_edu_realize(PCIDevice *pdev, Error **errp) { - EduState *edu = DO_UPCAST(EduState, pdev, pdev); + EduState *edu = EDU(pdev); uint8_t *pci_conf = pdev->config; pci_config_set_interrupt_pin(pci_conf, 1); @@ -365,7 +365,7 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp) static void pci_edu_uninit(PCIDevice *pdev) { - EduState *edu = DO_UPCAST(EduState, pdev, pdev); + EduState *edu = EDU(pdev); qemu_mutex_lock(&edu->thr_mutex); edu->stopping = true; diff --git a/hw/misc/hyperv_testdev.c b/hw/misc/hyperv_testdev.c deleted file mode 100644 index 7549f470b1..0000000000 --- a/hw/misc/hyperv_testdev.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * QEMU KVM Hyper-V test device to support Hyper-V kvm-unit-tests - * - * Copyright (C) 2015 Andrey Smetanin - * - * Authors: - * Andrey Smetanin - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include -#include "hw/hw.h" -#include "hw/qdev.h" -#include "hw/isa/isa.h" -#include "sysemu/kvm.h" -#include "target/i386/hyperv.h" -#include "kvm_i386.h" - -#define HV_TEST_DEV_MAX_SINT_ROUTES 64 - -struct HypervTestDev { - ISADevice parent_obj; - MemoryRegion sint_control; - HvSintRoute *sint_route[HV_TEST_DEV_MAX_SINT_ROUTES]; -}; -typedef struct HypervTestDev HypervTestDev; - -#define TYPE_HYPERV_TEST_DEV "hyperv-testdev" -#define HYPERV_TEST_DEV(obj) \ - OBJECT_CHECK(HypervTestDev, (obj), TYPE_HYPERV_TEST_DEV) - -enum { - HV_TEST_DEV_SINT_ROUTE_CREATE = 1, - HV_TEST_DEV_SINT_ROUTE_DESTROY, - HV_TEST_DEV_SINT_ROUTE_SET_SINT -}; - -static int alloc_sint_route_index(HypervTestDev *dev) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(dev->sint_route); i++) { - if (dev->sint_route[i] == NULL) { - return i; - } - } - return -1; -} - -static void free_sint_route_index(HypervTestDev *dev, int i) -{ - assert(i >= 0 && i < ARRAY_SIZE(dev->sint_route)); - dev->sint_route[i] = NULL; -} - -static int find_sint_route_index(HypervTestDev *dev, uint32_t vp_index, - uint32_t sint) -{ - HvSintRoute *sint_route; - int i; - - for (i = 0; i < ARRAY_SIZE(dev->sint_route); i++) { - sint_route = dev->sint_route[i]; - if (sint_route && sint_route->vp_index == vp_index && - sint_route->sint == sint) { - return i; - } - } - return -1; -} - -static void hv_synic_test_dev_control(HypervTestDev *dev, uint32_t ctl, - uint32_t vp_index, uint32_t sint) -{ - int i; - HvSintRoute *sint_route; - - switch (ctl) { - case HV_TEST_DEV_SINT_ROUTE_CREATE: - i = alloc_sint_route_index(dev); - assert(i >= 0); - sint_route = kvm_hv_sint_route_create(vp_index, sint, NULL); - assert(sint_route); - dev->sint_route[i] = sint_route; - break; - case HV_TEST_DEV_SINT_ROUTE_DESTROY: - i = find_sint_route_index(dev, vp_index, sint); - assert(i >= 0); - sint_route = dev->sint_route[i]; - kvm_hv_sint_route_destroy(sint_route); - free_sint_route_index(dev, i); - break; - case HV_TEST_DEV_SINT_ROUTE_SET_SINT: - i = find_sint_route_index(dev, vp_index, sint); - assert(i >= 0); - sint_route = dev->sint_route[i]; - kvm_hv_sint_route_set_sint(sint_route); - break; - default: - break; - } -} - -static uint64_t hv_test_dev_read(void *opaque, hwaddr addr, unsigned size) -{ - return 0; -} - -static void hv_test_dev_write(void *opaque, hwaddr addr, uint64_t data, - uint32_t len) -{ - HypervTestDev *dev = HYPERV_TEST_DEV(opaque); - uint8_t ctl; - - ctl = (data >> 16ULL) & 0xFF; - switch (ctl) { - case HV_TEST_DEV_SINT_ROUTE_CREATE: - case HV_TEST_DEV_SINT_ROUTE_DESTROY: - case HV_TEST_DEV_SINT_ROUTE_SET_SINT: { - uint8_t sint = data & 0xFF; - uint8_t vp_index = (data >> 8ULL) & 0xFF; - hv_synic_test_dev_control(dev, ctl, vp_index, sint); - break; - } - default: - break; - } -} - -static const MemoryRegionOps synic_test_sint_ops = { - .read = hv_test_dev_read, - .write = hv_test_dev_write, - .valid.min_access_size = 4, - .valid.max_access_size = 4, - .endianness = DEVICE_LITTLE_ENDIAN, -}; - -static void hv_test_dev_realizefn(DeviceState *d, Error **errp) -{ - ISADevice *isa = ISA_DEVICE(d); - HypervTestDev *dev = HYPERV_TEST_DEV(d); - MemoryRegion *io = isa_address_space_io(isa); - - memset(dev->sint_route, 0, sizeof(dev->sint_route)); - memory_region_init_io(&dev->sint_control, OBJECT(dev), - &synic_test_sint_ops, dev, - "hyperv-testdev-ctl", 4); - memory_region_add_subregion(io, 0x3000, &dev->sint_control); -} - -static void hv_test_dev_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - set_bit(DEVICE_CATEGORY_MISC, dc->categories); - dc->realize = hv_test_dev_realizefn; -} - -static const TypeInfo hv_test_dev_info = { - .name = TYPE_HYPERV_TEST_DEV, - .parent = TYPE_ISA_DEVICE, - .instance_size = sizeof(HypervTestDev), - .class_init = hv_test_dev_class_init, -}; - -static void hv_test_dev_register_types(void) -{ - type_register_static(&hv_test_dev_info); -} -type_init(hv_test_dev_register_types); diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index 0e608347c1..da73743fa2 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -327,6 +327,10 @@ static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) sysbus_add_io(sbd, 0xcfc, &s->data_mem); sysbus_init_ioports(sbd, 0xcfc, 4); + + /* register i440fx 0xcf8 port as coalesced pio */ + memory_region_set_flush_coalesced(&s->data_mem); + memory_region_add_coalescing(&s->conf_mem, 0, 4); } static void i440fx_realize(PCIDevice *dev, Error **errp) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 02f9576588..8ce1e09932 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -51,6 +51,10 @@ static void q35_host_realize(DeviceState *dev, Error **errp) sysbus_add_io(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, &pci->data_mem); sysbus_init_ioports(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, 4); + /* register q35 0xcf8 port as coalesced pio */ + memory_region_set_flush_coalesced(&pci->data_mem); + memory_region_add_coalescing(&pci->conf_mem, 0, 4); + pci->bus = pci_root_bus_new(DEVICE(s), "pcie.0", s->mch.pci_address_space, s->mch.address_space_io, diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index c43163cef4..e2c5408aa2 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -441,9 +441,18 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) } switch (error) { case 0: - /* The command has run, no need to fake sense. */ + /* A passthrough command has run and has produced sense data; check + * whether the error has to be handled by the guest or should rather + * pause the host. + */ assert(r->status && *r->status); - scsi_req_complete(&r->req, *r->status); + error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); + if (error == ECANCELED || error == EAGAIN || error == ENOTCONN || + error == 0) { + /* These errors are handled by guest. */ + scsi_req_complete(&r->req, *r->status); + return true; + } break; case ENOMEDIUM: scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); @@ -462,23 +471,17 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) break; } } - if (!error) { - assert(r->status && *r->status); - error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); - - if (error == ECANCELED || error == EAGAIN || error == ENOTCONN || - error == 0) { - /* These errors are handled by guest. */ - scsi_req_complete(&r->req, *r->status); - return true; - } - } blk_error_action(s->qdev.conf.blk, action, is_read, error); + if (action == BLOCK_ERROR_ACTION_IGNORE) { + scsi_req_complete(&r->req, 0); + return true; + } + if (action == BLOCK_ERROR_ACTION_STOP) { scsi_req_retry(&r->req); } - return action != BLOCK_ERROR_ACTION_IGNORE; + return false; } static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 5a3057d1f8..3aa99717e2 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -797,16 +797,8 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, virtio_scsi_acquire(s); blk_set_aio_context(sd->conf.blk, s->ctx); virtio_scsi_release(s); - } -} -/* Announce the new device after it has been plugged */ -static void virtio_scsi_post_hotplug(HotplugHandler *hotplug_dev, - DeviceState *dev) -{ - VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); - VirtIOSCSI *s = VIRTIO_SCSI(vdev); - SCSIDevice *sd = SCSI_DEVICE(dev); + } if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { virtio_scsi_acquire(s); @@ -976,7 +968,6 @@ static void virtio_scsi_class_init(ObjectClass *klass, void *data) vdc->start_ioeventfd = virtio_scsi_dataplane_start; vdc->stop_ioeventfd = virtio_scsi_dataplane_stop; hc->plug = virtio_scsi_hotplug; - hc->post_plug = virtio_scsi_post_hotplug; hc->unplug = virtio_scsi_hotunplug; } diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c index acee47da0e..e4e4de8b8a 100644 --- a/hw/timer/mc146818rtc.c +++ b/hw/timer/mc146818rtc.c @@ -34,6 +34,7 @@ #include "qapi/qapi-commands-misc.h" #include "qapi/qapi-events-misc.h" #include "qapi/visitor.h" +#include "exec/address-spaces.h" #ifdef TARGET_I386 #include "hw/i386/apic.h" @@ -70,6 +71,7 @@ typedef struct RTCState { ISADevice parent_obj; MemoryRegion io; + MemoryRegion coalesced_io; uint8_t cmos_data[128]; uint8_t cmos_index; int32_t base_year; @@ -990,6 +992,13 @@ static void rtc_realizefn(DeviceState *dev, Error **errp) memory_region_init_io(&s->io, OBJECT(s), &cmos_ops, s, "rtc", 2); isa_register_ioport(isadev, &s->io, base); + /* register rtc 0x70 port for coalesced_pio */ + memory_region_set_flush_coalesced(&s->io); + memory_region_init_io(&s->coalesced_io, OBJECT(s), &cmos_ops, + s, "rtc-index", 1); + memory_region_add_subregion(&s->io, 0, &s->coalesced_io); + memory_region_add_coalescing(&s->coalesced_io, 0, 1); + qdev_set_legacy_instance_id(dev, base, 3); qemu_register_reset(rtc_reset, s); diff --git a/include/block/aio.h b/include/block/aio.h index f08630c6e5..0ca25dfec6 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -387,6 +387,32 @@ struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp); /* Return the LinuxAioState bound to this AioContext */ struct LinuxAioState *aio_get_linux_aio(AioContext *ctx); +/** + * aio_timer_new_with_attrs: + * @ctx: the aio context + * @type: the clock type + * @scale: the scale + * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_ values + * to assign + * @cb: the callback to call on timer expiry + * @opaque: the opaque pointer to pass to the callback + * + * Allocate a new timer (with attributes) attached to the context @ctx. + * The function is responsible for memory allocation. + * + * The preferred interface is aio_timer_init or aio_timer_init_with_attrs. + * Use that unless you really need dynamic memory allocation. + * + * Returns: a pointer to the new timer + */ +static inline QEMUTimer *aio_timer_new_with_attrs(AioContext *ctx, + QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) +{ + return timer_new_full(&ctx->tlg, type, scale, attributes, cb, opaque); +} + /** * aio_timer_new: * @ctx: the aio context @@ -396,10 +422,7 @@ struct LinuxAioState *aio_get_linux_aio(AioContext *ctx); * @opaque: the opaque pointer to pass to the callback * * Allocate a new timer attached to the context @ctx. - * The function is responsible for memory allocation. - * - * The preferred interface is aio_timer_init. Use that - * unless you really need dynamic memory allocation. + * See aio_timer_new_with_attrs for details. * * Returns: a pointer to the new timer */ @@ -407,7 +430,29 @@ static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type, int scale, QEMUTimerCB *cb, void *opaque) { - return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque); + return timer_new_full(&ctx->tlg, type, scale, 0, cb, opaque); +} + +/** + * aio_timer_init_with_attrs: + * @ctx: the aio context + * @ts: the timer + * @type: the clock type + * @scale: the scale + * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_ values + * to assign + * @cb: the callback to call on timer expiry + * @opaque: the opaque pointer to pass to the callback + * + * Initialise a new timer (with attributes) attached to the context @ctx. + * The caller is responsible for memory allocation. + */ +static inline void aio_timer_init_with_attrs(AioContext *ctx, + QEMUTimer *ts, QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) +{ + timer_init_full(ts, &ctx->tlg, type, scale, attributes, cb, opaque); } /** @@ -420,14 +465,14 @@ static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type, * @opaque: the opaque pointer to pass to the callback * * Initialise a new timer attached to the context @ctx. - * The caller is responsible for memory allocation. + * See aio_timer_init_with_attrs for details. */ static inline void aio_timer_init(AioContext *ctx, QEMUTimer *ts, QEMUClockType type, int scale, QEMUTimerCB *cb, void *opaque) { - timer_init_tl(ts, ctx->tlg.tl[type], scale, cb, opaque); + timer_init_full(ts, &ctx->tlg, type, scale, 0, cb, opaque); } /** diff --git a/include/exec/memory.h b/include/exec/memory.h index 3a427aacf1..667466b8f3 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -419,9 +419,9 @@ struct MemoryListener { bool match_data, uint64_t data, EventNotifier *e); void (*eventfd_del)(MemoryListener *listener, MemoryRegionSection *section, bool match_data, uint64_t data, EventNotifier *e); - void (*coalesced_mmio_add)(MemoryListener *listener, MemoryRegionSection *section, + void (*coalesced_io_add)(MemoryListener *listener, MemoryRegionSection *section, hwaddr addr, hwaddr len); - void (*coalesced_mmio_del)(MemoryListener *listener, MemoryRegionSection *section, + void (*coalesced_io_del)(MemoryListener *listener, MemoryRegionSection *section, hwaddr addr, hwaddr len); /* Lower = earlier (during add), later (during del) */ unsigned priority; diff --git a/include/hw/hotplug.h b/include/hw/hotplug.h index 51541d63e1..1a0516a479 100644 --- a/include/hw/hotplug.h +++ b/include/hw/hotplug.h @@ -47,8 +47,6 @@ typedef void (*hotplug_fn)(HotplugHandler *plug_handler, * @parent: Opaque parent interface. * @pre_plug: pre plug callback called at start of device.realize(true) * @plug: plug callback called at end of device.realize(true). - * @post_plug: post plug callback called after device.realize(true) and device - * reset * @unplug_request: unplug request callback. * Used as a means to initiate device unplug for devices that * require asynchronous unplug handling. @@ -63,7 +61,6 @@ typedef struct HotplugHandlerClass { /* */ hotplug_fn pre_plug; hotplug_fn plug; - void (*post_plug)(HotplugHandler *plug_handler, DeviceState *plugged_dev); hotplug_fn unplug_request; hotplug_fn unplug; } HotplugHandlerClass; @@ -86,14 +83,6 @@ void hotplug_handler_pre_plug(HotplugHandler *plug_handler, DeviceState *plugged_dev, Error **errp); -/** - * hotplug_handler_post_plug: - * - * Call #HotplugHandlerClass.post_plug callback of @plug_handler. - */ -void hotplug_handler_post_plug(HotplugHandler *plug_handler, - DeviceState *plugged_dev); - /** * hotplug_handler_unplug_request: * diff --git a/include/hw/hyperv/hyperv-proto.h b/include/hw/hyperv/hyperv-proto.h new file mode 100644 index 0000000000..21dc28aee9 --- /dev/null +++ b/include/hw/hyperv/hyperv-proto.h @@ -0,0 +1,130 @@ +/* + * Definitions for Hyper-V guest/hypervisor interaction + * + * Copyright (c) 2017-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_HYPERV_PROTO_H +#define HW_HYPERV_HYPERV_PROTO_H + +#include "qemu/bitmap.h" + +/* + * Hypercall status code + */ +#define HV_STATUS_SUCCESS 0 +#define HV_STATUS_INVALID_HYPERCALL_CODE 2 +#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 +#define HV_STATUS_INVALID_ALIGNMENT 4 +#define HV_STATUS_INVALID_PARAMETER 5 +#define HV_STATUS_INSUFFICIENT_MEMORY 11 +#define HV_STATUS_INVALID_PORT_ID 17 +#define HV_STATUS_INVALID_CONNECTION_ID 18 +#define HV_STATUS_INSUFFICIENT_BUFFERS 19 + +/* + * Hypercall numbers + */ +#define HV_POST_MESSAGE 0x005c +#define HV_SIGNAL_EVENT 0x005d +#define HV_HYPERCALL_FAST (1u << 16) + +/* + * Message size + */ +#define HV_MESSAGE_PAYLOAD_SIZE 240 + +/* + * Message types + */ +#define HV_MESSAGE_NONE 0x00000000 +#define HV_MESSAGE_VMBUS 0x00000001 +#define HV_MESSAGE_UNMAPPED_GPA 0x80000000 +#define HV_MESSAGE_GPA_INTERCEPT 0x80000001 +#define HV_MESSAGE_TIMER_EXPIRED 0x80000010 +#define HV_MESSAGE_INVALID_VP_REGISTER_VALUE 0x80000020 +#define HV_MESSAGE_UNRECOVERABLE_EXCEPTION 0x80000021 +#define HV_MESSAGE_UNSUPPORTED_FEATURE 0x80000022 +#define HV_MESSAGE_EVENTLOG_BUFFERCOMPLETE 0x80000040 +#define HV_MESSAGE_X64_IOPORT_INTERCEPT 0x80010000 +#define HV_MESSAGE_X64_MSR_INTERCEPT 0x80010001 +#define HV_MESSAGE_X64_CPUID_INTERCEPT 0x80010002 +#define HV_MESSAGE_X64_EXCEPTION_INTERCEPT 0x80010003 +#define HV_MESSAGE_X64_APIC_EOI 0x80010004 +#define HV_MESSAGE_X64_LEGACY_FP_ERROR 0x80010005 + +/* + * Message flags + */ +#define HV_MESSAGE_FLAG_PENDING 0x1 + +/* + * Number of synthetic interrupts + */ +#define HV_SINT_COUNT 16 + +/* + * Event flags number per SINT + */ +#define HV_EVENT_FLAGS_COUNT (256 * 8) + +/* + * Connection id valid bits + */ +#define HV_CONNECTION_ID_MASK 0x00ffffff + +/* + * Input structure for POST_MESSAGE hypercall + */ +struct hyperv_post_message_input { + uint32_t connection_id; + uint32_t _reserved; + uint32_t message_type; + uint32_t payload_size; + uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; +}; + +/* + * Input structure for SIGNAL_EVENT hypercall + */ +struct hyperv_signal_event_input { + uint32_t connection_id; + uint16_t flag_number; + uint16_t _reserved_zero; +}; + +/* + * SynIC message structures + */ +struct hyperv_message_header { + uint32_t message_type; + uint8_t payload_size; + uint8_t message_flags; /* HV_MESSAGE_FLAG_XX */ + uint8_t _reserved[2]; + uint64_t sender; +}; + +struct hyperv_message { + struct hyperv_message_header header; + uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; +}; + +struct hyperv_message_page { + struct hyperv_message slot[HV_SINT_COUNT]; +}; + +/* + * SynIC event flags structures + */ +struct hyperv_event_flags { + DECLARE_BITMAP(flags, HV_EVENT_FLAGS_COUNT); +}; + +struct hyperv_event_flags_page { + struct hyperv_event_flags slot[HV_SINT_COUNT]; +}; + +#endif diff --git a/include/hw/hyperv/hyperv.h b/include/hw/hyperv/hyperv.h new file mode 100644 index 0000000000..597381cb01 --- /dev/null +++ b/include/hw/hyperv/hyperv.h @@ -0,0 +1,83 @@ +/* + * Hyper-V guest/hypervisor interaction + * + * Copyright (c) 2015-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HW_HYPERV_HYPERV_H +#define HW_HYPERV_HYPERV_H + +#include "cpu-qom.h" +#include "hw/hyperv/hyperv-proto.h" + +typedef struct HvSintRoute HvSintRoute; + +/* + * Callback executed in a bottom-half when the status of posting the message + * becomes known, before unblocking the connection for further messages + */ +typedef void (*HvSintMsgCb)(void *data, int status); + +HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, + HvSintMsgCb cb, void *cb_data); +void hyperv_sint_route_ref(HvSintRoute *sint_route); +void hyperv_sint_route_unref(HvSintRoute *sint_route); + +int hyperv_sint_route_set_sint(HvSintRoute *sint_route); + +/* + * Submit a message to be posted in vcpu context. If the submission succeeds, + * the status of posting the message is reported via the callback associated + * with the @sint_route; until then no more messages are accepted. + */ +int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *msg); +/* + * Set event flag @eventno, and signal the SINT if the flag has changed. + */ +int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno); + +/* + * Handler for messages arriving from the guest via HV_POST_MESSAGE hypercall. + * Executed in vcpu context. + */ +typedef uint16_t (*HvMsgHandler)(const struct hyperv_post_message_input *msg, + void *data); +/* + * Associate @handler with the message connection @conn_id, such that @handler + * is called with @data when the guest executes HV_POST_MESSAGE hypercall on + * @conn_id. If @handler is NULL clear the association. + */ +int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data); +/* + * Associate @notifier with the event connection @conn_id, such that @notifier + * is signaled when the guest executes HV_SIGNAL_EVENT hypercall on @conn_id. + * If @notifier is NULL clear the association. + */ +int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier); + +/* + * Process HV_POST_MESSAGE hypercall: parse the data in the guest memory as + * specified in @param, and call the HvMsgHandler associated with the + * connection on the message contained therein. + */ +uint16_t hyperv_hcall_post_message(uint64_t param, bool fast); +/* + * Process HV_SIGNAL_EVENT hypercall: signal the EventNotifier associated with + * the connection as specified in @param. + */ +uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast); + +static inline uint32_t hyperv_vp_index(CPUState *cs) +{ + return cs->cpu_index; +} + +void hyperv_synic_add(CPUState *cs); +void hyperv_synic_reset(CPUState *cs); +void hyperv_synic_update(CPUState *cs, bool enable, + hwaddr msg_page_addr, hwaddr event_page_addr); + +#endif diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 6894f37df1..dfe6746692 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -294,6 +294,14 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t); int e820_get_num_entries(void); bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); +#define PC_COMPAT_3_0 \ + HW_COMPAT_3_0 \ + {\ + .driver = TYPE_X86_CPU,\ + .property = "x-hv-synic-kvm-only",\ + .value = "on",\ + } + #define PC_COMPAT_2_12 \ HW_COMPAT_2_12 \ {\ diff --git a/include/qemu/timer.h b/include/qemu/timer.h index dfecd03e28..a86330c987 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -2,6 +2,7 @@ #define QEMU_TIMER_H #include "qemu-common.h" +#include "qemu/bitops.h" #include "qemu/notify.h" #include "qemu/host-utils.h" @@ -42,14 +43,6 @@ * In icount mode, this clock counts nanoseconds while the virtual * machine is running. It is used to increase @QEMU_CLOCK_VIRTUAL * while the CPUs are sleeping and thus not executing instructions. - * - * @QEMU_CLOCK_VIRTUAL_EXT: virtual clock for external subsystems - * - * The virtual clock only runs during the emulation. It stops - * when the virtual machine is stopped. The timers for this clock - * do not recorded in rr mode, therefore this clock could be used - * for the subsystems that operate outside the guest core. - * */ typedef enum { @@ -57,10 +50,27 @@ typedef enum { QEMU_CLOCK_VIRTUAL = 1, QEMU_CLOCK_HOST = 2, QEMU_CLOCK_VIRTUAL_RT = 3, - QEMU_CLOCK_VIRTUAL_EXT = 4, QEMU_CLOCK_MAX } QEMUClockType; +/** + * QEMU Timer attributes: + * + * An individual timer may be given one or multiple attributes when initialized. + * Each attribute corresponds to one bit. Attributes modify the processing + * of timers when they fire. + * + * The following attributes are available: + * + * QEMU_TIMER_ATTR_EXTERNAL: drives external subsystem + * + * Timers with this attribute do not recorded in rr mode, therefore it could be + * used for the subsystems that operate outside the guest core. Applicable only + * with virtual clock type. + */ + +#define QEMU_TIMER_ATTR_EXTERNAL BIT(0) + typedef struct QEMUTimerList QEMUTimerList; struct QEMUTimerListGroup { @@ -76,6 +86,7 @@ struct QEMUTimer { QEMUTimerCB *cb; void *opaque; QEMUTimer *next; + int attributes; int scale; }; @@ -427,22 +438,27 @@ int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup *tlg); */ /** - * timer_init_tl: + * timer_init_full: * @ts: the timer to be initialised - * @timer_list: the timer list to attach the timer to + * @timer_list_group: (optional) the timer list group to attach the timer to + * @type: the clock type to use * @scale: the scale value for the timer + * @attributes: 0, or one or more OR'ed QEMU_TIMER_ATTR_ values * @cb: the callback to be called when the timer expires * @opaque: the opaque pointer to be passed to the callback * - * Initialise a new timer and associate it with @timer_list. + * Initialise a timer with the given scale and attributes, + * and associate it with timer list for given clock @type in @timer_list_group + * (or default timer list group, if NULL). * The caller is responsible for allocating the memory. * * You need not call an explicit deinit call. Simply make * sure it is not on a list with timer_del. */ -void timer_init_tl(QEMUTimer *ts, - QEMUTimerList *timer_list, int scale, - QEMUTimerCB *cb, void *opaque); +void timer_init_full(QEMUTimer *ts, + QEMUTimerListGroup *timer_list_group, QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque); /** * timer_init: @@ -454,14 +470,12 @@ void timer_init_tl(QEMUTimer *ts, * * Initialize a timer with the given scale on the default timer list * associated with the clock. - * - * You need not call an explicit deinit call. Simply make - * sure it is not on a list with timer_del. + * See timer_init_full for details. */ static inline void timer_init(QEMUTimer *ts, QEMUClockType type, int scale, QEMUTimerCB *cb, void *opaque) { - timer_init_tl(ts, main_loop_tlg.tl[type], scale, cb, opaque); + timer_init_full(ts, NULL, type, scale, 0, cb, opaque); } /** @@ -473,9 +487,7 @@ static inline void timer_init(QEMUTimer *ts, QEMUClockType type, int scale, * * Initialize a timer with nanosecond scale on the default timer list * associated with the clock. - * - * You need not call an explicit deinit call. Simply make - * sure it is not on a list with timer_del. + * See timer_init_full for details. */ static inline void timer_init_ns(QEMUTimer *ts, QEMUClockType type, QEMUTimerCB *cb, void *opaque) @@ -492,9 +504,7 @@ static inline void timer_init_ns(QEMUTimer *ts, QEMUClockType type, * * Initialize a timer with microsecond scale on the default timer list * associated with the clock. - * - * You need not call an explicit deinit call. Simply make - * sure it is not on a list with timer_del. + * See timer_init_full for details. */ static inline void timer_init_us(QEMUTimer *ts, QEMUClockType type, QEMUTimerCB *cb, void *opaque) @@ -511,9 +521,7 @@ static inline void timer_init_us(QEMUTimer *ts, QEMUClockType type, * * Initialize a timer with millisecond scale on the default timer list * associated with the clock. - * - * You need not call an explicit deinit call. Simply make - * sure it is not on a list with timer_del. + * See timer_init_full for details. */ static inline void timer_init_ms(QEMUTimer *ts, QEMUClockType type, QEMUTimerCB *cb, void *opaque) @@ -522,27 +530,37 @@ static inline void timer_init_ms(QEMUTimer *ts, QEMUClockType type, } /** - * timer_new_tl: - * @timer_list: the timer list to attach the timer to + * timer_new_full: + * @timer_list_group: (optional) the timer list group to attach the timer to + * @type: the clock type to use * @scale: the scale value for the timer + * @attributes: 0, or one or more OR'ed QEMU_TIMER_ATTR_ values * @cb: the callback to be called when the timer expires * @opaque: the opaque pointer to be passed to the callback * - * Create a new timer and associate it with @timer_list. + * Create a new timer with the given scale and attributes, + * and associate it with timer list for given clock @type in @timer_list_group + * (or default timer list group, if NULL). * The memory is allocated by the function. * * This is not the preferred interface unless you know you - * are going to call timer_free. Use timer_init instead. + * are going to call timer_free. Use timer_init or timer_init_full instead. + * + * The default timer list has one special feature: in icount mode, + * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is + * not true of other timer lists, which are typically associated + * with an AioContext---each of them runs its timer callbacks in its own + * AioContext thread. * * Returns: a pointer to the timer */ -static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list, - int scale, - QEMUTimerCB *cb, - void *opaque) +static inline QEMUTimer *timer_new_full(QEMUTimerListGroup *timer_list_group, + QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) { QEMUTimer *ts = g_malloc0(sizeof(QEMUTimer)); - timer_init_tl(ts, timer_list, scale, cb, opaque); + timer_init_full(ts, timer_list_group, type, scale, attributes, cb, opaque); return ts; } @@ -553,21 +571,16 @@ static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list, * @cb: the callback to be called when the timer expires * @opaque: the opaque pointer to be passed to the callback * - * Create a new timer and associate it with the default - * timer list for the clock type @type. - * - * The default timer list has one special feature: in icount mode, - * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is - * not true of other timer lists, which are typically associated - * with an AioContext---each of them runs its timer callbacks in its own - * AioContext thread. + * Create a new timer with the given scale, + * and associate it with the default timer list for the clock type @type. + * See timer_new_full for details. * * Returns: a pointer to the timer */ static inline QEMUTimer *timer_new(QEMUClockType type, int scale, QEMUTimerCB *cb, void *opaque) { - return timer_new_tl(main_loop_tlg.tl[type], scale, cb, opaque); + return timer_new_full(NULL, type, scale, 0, cb, opaque); } /** @@ -578,12 +591,7 @@ static inline QEMUTimer *timer_new(QEMUClockType type, int scale, * * Create a new timer with nanosecond scale on the default timer list * associated with the clock. - * - * The default timer list has one special feature: in icount mode, - * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is - * not true of other timer lists, which are typically associated - * with an AioContext---each of them runs its timer callbacks in its own - * AioContext thread. + * See timer_new_full for details. * * Returns: a pointer to the newly created timer */ @@ -599,14 +607,9 @@ static inline QEMUTimer *timer_new_ns(QEMUClockType type, QEMUTimerCB *cb, * @cb: the callback to call when the timer expires * @opaque: the opaque pointer to pass to the callback * - * The default timer list has one special feature: in icount mode, - * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is - * not true of other timer lists, which are typically associated - * with an AioContext---each of them runs its timer callbacks in its own - * AioContext thread. - * * Create a new timer with microsecond scale on the default timer list * associated with the clock. + * See timer_new_full for details. * * Returns: a pointer to the newly created timer */ @@ -622,14 +625,9 @@ static inline QEMUTimer *timer_new_us(QEMUClockType type, QEMUTimerCB *cb, * @cb: the callback to call when the timer expires * @opaque: the opaque pointer to pass to the callback * - * The default timer list has one special feature: in icount mode, - * %QEMU_CLOCK_VIRTUAL timers are run in the vCPU thread. This is - * not true of other timer lists, which are typically associated - * with an AioContext---each of them runs its timer callbacks in its own - * AioContext thread. - * * Create a new timer with millisecond scale on the default timer list * associated with the clock. + * See timer_new_full for details. * * Returns: a pointer to the newly created timer */ diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h index 241118845c..aaa51d2c51 100644 --- a/include/sysemu/hvf.h +++ b/include/sysemu/hvf.h @@ -17,7 +17,7 @@ #include "exec/memory.h" #include "sysemu/accel.h" -extern int hvf_disabled; +extern bool hvf_allowed; #ifdef CONFIG_HVF #include #include @@ -26,7 +26,7 @@ extern int hvf_disabled; #include "hw/hw.h" uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, int reg); -#define hvf_enabled() !hvf_disabled +#define hvf_enabled() (hvf_allowed) #else #define hvf_enabled() 0 #define hvf_get_supported_cpuid(func, idx, reg) 0 diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h index 7f7a594eca..3a7c58e423 100644 --- a/include/sysemu/replay.h +++ b/include/sysemu/replay.h @@ -100,14 +100,20 @@ bool replay_has_interrupt(void); /* Processing clocks and other time sources */ /*! Save the specified clock */ -int64_t replay_save_clock(ReplayClockKind kind, int64_t clock); +int64_t replay_save_clock(ReplayClockKind kind, int64_t clock, + int64_t raw_icount); /*! Read the specified clock from the log or return cached data */ int64_t replay_read_clock(ReplayClockKind kind); /*! Saves or reads the clock depending on the current replay mode. */ #define REPLAY_CLOCK(clock, value) \ (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock)) \ : replay_mode == REPLAY_MODE_RECORD \ - ? replay_save_clock((clock), (value)) \ + ? replay_save_clock((clock), (value), cpu_get_icount_raw()) \ + : (value)) +#define REPLAY_CLOCK_LOCKED(clock, value) \ + (replay_mode == REPLAY_MODE_PLAY ? replay_read_clock((clock)) \ + : replay_mode == REPLAY_MODE_RECORD \ + ? replay_save_clock((clock), (value), cpu_get_icount_raw_locked()) \ : (value)) /* Events */ diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index 1b32b56a03..8c876c166e 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) #define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) +#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index fd23d5778e..dabfcf7c39 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -288,6 +288,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 +#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -299,7 +300,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - __u8 pad; + __u8 pending; __u32 error_code; } exception; struct { @@ -322,7 +323,9 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u32 reserved[9]; + __u8 reserved[27]; + __u8 exception_has_payload; + __u64 exception_payload; }; /* for KVM_GET/SET_DEBUGREGS */ @@ -381,6 +384,7 @@ struct kvm_sync_regs { #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 +#define KVM_STATE_NESTED_EVMCS 0x00000004 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 83ba4eb571..f11a7eb49c 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -420,13 +420,19 @@ struct kvm_run { struct kvm_coalesced_mmio_zone { __u64 addr; __u32 size; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; }; struct kvm_coalesced_mmio { __u64 phys_addr; __u32 len; - __u32 pad; + union { + __u32 pad; + __u32 pio; + }; __u8 data[8]; }; @@ -719,6 +725,7 @@ struct kvm_ppc_one_seg_page_size { #define KVM_PPC_PAGE_SIZES_REAL 0x00000001 #define KVM_PPC_1T_SEGMENTS 0x00000002 +#define KVM_PPC_NO_HASH 0x00000004 struct kvm_ppc_smmu_info { __u64 flags; @@ -953,6 +960,11 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_NESTED_STATE 157 #define KVM_CAP_ARM_INJECT_SERROR_ESR 158 #define KVM_CAP_MSR_PLATFORM_INFO 159 +#define KVM_CAP_PPC_NESTED_HV 160 +#define KVM_CAP_HYPERV_SEND_IPI 161 +#define KVM_CAP_COALESCED_PIO 162 +#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 +#define KVM_CAP_EXCEPTION_PAYLOAD 164 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/memory.c b/memory.c index d852f1143d..51204aa079 100644 --- a/memory.c +++ b/memory.c @@ -2129,7 +2129,7 @@ static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpa .size = fr->addr.size, }; - MEMORY_LISTENER_CALL(as, coalesced_mmio_del, Reverse, §ion, + MEMORY_LISTENER_CALL(as, coalesced_io_del, Reverse, §ion, int128_get64(fr->addr.start), int128_get64(fr->addr.size)); QTAILQ_FOREACH(cmr, &mr->coalesced, link) { @@ -2140,7 +2140,7 @@ static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpa continue; } tmp = addrrange_intersection(tmp, fr->addr); - MEMORY_LISTENER_CALL(as, coalesced_mmio_add, Forward, §ion, + MEMORY_LISTENER_CALL(as, coalesced_io_add, Forward, §ion, int128_get64(tmp.start), int128_get64(tmp.size)); } diff --git a/qapi/misc.json b/qapi/misc.json index 3a68af9ca3..c85c6c8ca3 100644 --- a/qapi/misc.json +++ b/qapi/misc.json @@ -3070,7 +3070,8 @@ # Emitted when the guest changes the RTC time. # # @offset: offset between base RTC clock (as specified by -rtc base), and -# new RTC clock value +# new RTC clock value. Note that value will be different depending +# on clock chosen to drive RTC (specified by -rtc clock). # # Note: This event is rate-limited. # diff --git a/qemu-options.hx b/qemu-options.hx index 0f2f65ee40..214ce396f9 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3458,25 +3458,29 @@ HXCOMM Silently ignored for compatibility DEF("clock", HAS_ARG, QEMU_OPTION_clock, "", QEMU_ARCH_ALL) DEF("rtc", HAS_ARG, QEMU_OPTION_rtc, \ - "-rtc [base=utc|localtime|date][,clock=host|rt|vm][,driftfix=none|slew]\n" \ + "-rtc [base=utc|localtime|][,clock=host|rt|vm][,driftfix=none|slew]\n" \ " set the RTC base and clock, enable drift fix for clock ticks (x86 only)\n", QEMU_ARCH_ALL) STEXI -@item -rtc [base=utc|localtime|@var{date}][,clock=host|vm][,driftfix=none|slew] +@item -rtc [base=utc|localtime|@var{datetime}][,clock=host|rt|vm][,driftfix=none|slew] @findex -rtc Specify @option{base} as @code{utc} or @code{localtime} to let the RTC start at the current UTC or local time, respectively. @code{localtime} is required for correct date in -MS-DOS or Windows. To start at a specific point in time, provide @var{date} in the +MS-DOS or Windows. To start at a specific point in time, provide @var{datetime} in the format @code{2006-06-17T16:01:21} or @code{2006-06-17}. The default base is UTC. By default the RTC is driven by the host system time. This allows using of the RTC as accurate reference clock inside the guest, specifically if the host time is smoothly following an accurate external reference clock, e.g. via NTP. If you want to isolate the guest time from the host, you can set @option{clock} -to @code{rt} instead. To even prevent it from progressing during suspension, -you can set it to @code{vm}. +to @code{rt} instead, which provides a host monotonic clock if host support it. +To even prevent the RTC from progressing during suspension, you can set @option{clock} +to @code{vm} (virtual clock). @samp{clock=vm} is recommended especially in +icount mode in order to preserve determinism; however, note that in icount mode +the speed of the virtual clock is variable and can in general differ from the +host clock. Enable @option{driftfix} (i386 targets only) if you experience time drift problems, specifically with Windows' ACPI HAL. This option will try to figure out how diff --git a/replay/replay-events.c b/replay/replay-events.c index 0964a82838..d9a2d495b9 100644 --- a/replay/replay-events.c +++ b/replay/replay-events.c @@ -190,6 +190,7 @@ void replay_save_events(int checkpoint) { g_assert(replay_mutex_locked()); g_assert(checkpoint != CHECKPOINT_CLOCK_WARP_START); + g_assert(checkpoint != CHECKPOINT_CLOCK_VIRTUAL); while (!QTAILQ_EMPTY(&events_list)) { Event *event = QTAILQ_FIRST(&events_list); replay_save_event(event, checkpoint); diff --git a/replay/replay-internal.c b/replay/replay-internal.c index b077cb5fd5..1cea1d4dc9 100644 --- a/replay/replay-internal.c +++ b/replay/replay-internal.c @@ -217,20 +217,25 @@ void replay_mutex_unlock(void) } } +void replay_advance_current_step(uint64_t current_step) +{ + int diff = (int)(replay_get_current_step() - replay_state.current_step); + + /* Time can only go forward */ + assert(diff >= 0); + + if (diff > 0) { + replay_put_event(EVENT_INSTRUCTION); + replay_put_dword(diff); + replay_state.current_step += diff; + } +} + /*! Saves cached instructions. */ void replay_save_instructions(void) { if (replay_file && replay_mode == REPLAY_MODE_RECORD) { g_assert(replay_mutex_locked()); - int diff = (int)(replay_get_current_step() - replay_state.current_step); - - /* Time can only go forward */ - assert(diff >= 0); - - if (diff > 0) { - replay_put_event(EVENT_INSTRUCTION); - replay_put_dword(diff); - replay_state.current_step += diff; - } + replay_advance_current_step(replay_get_current_step()); } } diff --git a/replay/replay-internal.h b/replay/replay-internal.h index 9b0fd916a3..af6f4d55d4 100644 --- a/replay/replay-internal.h +++ b/replay/replay-internal.h @@ -122,6 +122,8 @@ void replay_finish_event(void); data_kind variable. */ void replay_fetch_data_kind(void); +/*! Advance replay_state.current_step to the specified value. */ +void replay_advance_current_step(uint64_t current_step); /*! Saves queued events (like instructions and sound). */ void replay_save_instructions(void); diff --git a/replay/replay-time.c b/replay/replay-time.c index 6a7565ec8d..0df1693337 100644 --- a/replay/replay-time.c +++ b/replay/replay-time.c @@ -15,13 +15,15 @@ #include "replay-internal.h" #include "qemu/error-report.h" -int64_t replay_save_clock(ReplayClockKind kind, int64_t clock) +int64_t replay_save_clock(ReplayClockKind kind, int64_t clock, int64_t raw_icount) { - if (replay_file) { g_assert(replay_mutex_locked()); - replay_save_instructions(); + /* Due to the caller's locking requirements we get the icount from it + * instead of using replay_save_instructions(). + */ + replay_advance_current_step(raw_icount); replay_put_event(EVENT_CLOCK + kind); replay_put_qword(clock); } diff --git a/replay/replay.c b/replay/replay.c index 379b51ab46..8b172b2d1b 100644 --- a/replay/replay.c +++ b/replay/replay.c @@ -214,7 +214,14 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint) /* This checkpoint belongs to several threads. Processing events from different threads is non-deterministic */ - if (checkpoint != CHECKPOINT_CLOCK_WARP_START) { + if (checkpoint != CHECKPOINT_CLOCK_WARP_START + /* FIXME: this is temporary fix, other checkpoints + may also be invoked from the different threads someday. + Asynchronous event processing should be refactored + to create additional replay event kind which is + nailed to the one of the threads and which processes + the event queue. */ + && checkpoint != CHECKPOINT_CLOCK_VIRTUAL) { replay_save_events(checkpoint); } res = true; diff --git a/slirp/ip6_icmp.c b/slirp/ip6_icmp.c index 3f41187cfe..cd1e0b9fe1 100644 --- a/slirp/ip6_icmp.c +++ b/slirp/ip6_icmp.c @@ -17,7 +17,7 @@ static void ra_timer_handler(void *opaque) { Slirp *slirp = opaque; timer_mod(slirp->ra_timer, - qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_EXT) + NDP_Interval); + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NDP_Interval); ndp_send_ra(slirp); } @@ -27,10 +27,11 @@ void icmp6_init(Slirp *slirp) return; } - slirp->ra_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_EXT, - ra_timer_handler, slirp); + slirp->ra_timer = timer_new_full(NULL, QEMU_CLOCK_VIRTUAL, + SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL, + ra_timer_handler, slirp); timer_mod(slirp->ra_timer, - qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_EXT) + NDP_Interval); + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NDP_Interval); } void icmp6_cleanup(Slirp *slirp) diff --git a/stubs/cpu-get-icount.c b/stubs/cpu-get-icount.c index 0b7239d721..35f0c1e24c 100644 --- a/stubs/cpu-get-icount.c +++ b/stubs/cpu-get-icount.c @@ -11,6 +11,11 @@ int64_t cpu_get_icount(void) abort(); } +int64_t cpu_get_icount_raw(void) +{ + abort(); +} + void qemu_timer_notify_cb(void *opaque, QEMUClockType type) { qemu_notify_event(); diff --git a/stubs/replay.c b/stubs/replay.c index 04279abb2c..4ac607895d 100644 --- a/stubs/replay.c +++ b/stubs/replay.c @@ -4,7 +4,7 @@ ReplayMode replay_mode; -int64_t replay_save_clock(unsigned int kind, int64_t clock) +int64_t replay_save_clock(unsigned int kind, int64_t clock, int64_t raw_icount) { abort(); return 0; diff --git a/target/i386/Makefile.objs b/target/i386/Makefile.objs index 04678f5503..32bf966300 100644 --- a/target/i386/Makefile.objs +++ b/target/i386/Makefile.objs @@ -3,17 +3,20 @@ obj-$(CONFIG_TCG) += translate.o obj-$(CONFIG_TCG) += bpt_helper.o cc_helper.o excp_helper.o fpu_helper.o obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o mpx_helper.o obj-$(CONFIG_TCG) += seg_helper.o smm_helper.o svm_helper.o -obj-$(CONFIG_SOFTMMU) += machine.o arch_memory_mapping.o arch_dump.o monitor.o -obj-$(CONFIG_KVM) += kvm.o hyperv.o -obj-$(CONFIG_SEV) += sev.o +ifeq ($(CONFIG_SOFTMMU),y) +obj-y += machine.o arch_memory_mapping.o arch_dump.o monitor.o +obj-$(CONFIG_KVM) += kvm.o obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o -obj-$(call lnot,$(CONFIG_SEV)) += sev-stub.o -# HAX support -ifdef CONFIG_WIN32 +obj-$(CONFIG_HYPERV) += hyperv.o +obj-$(call lnot,$(CONFIG_HYPERV)) += hyperv-stub.o +ifeq ($(CONFIG_WIN32),y) obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-windows.o endif -ifdef CONFIG_DARWIN +ifeq ($(CONFIG_DARWIN),y) obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-darwin.o obj-$(CONFIG_HVF) += hvf/ endif obj-$(CONFIG_WHPX) += whpx-all.o +endif +obj-$(CONFIG_SEV) += sev.o +obj-$(call lnot,$(CONFIG_SEV)) += sev-stub.o diff --git a/target/i386/cpu.c b/target/i386/cpu.c index c88876dfe3..f0d9f7cf49 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5564,6 +5564,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false), DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), + DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), @@ -5606,6 +5607,8 @@ static Property x86_cpu_properties[] = { * to the specific Windows version being used." */ DEFINE_PROP_INT32("x-hv-max-vps", X86CPU, hv_max_vps, -1), + DEFINE_PROP_BOOL("x-hv-synic-kvm-only", X86CPU, hyperv_synic_kvm_only, + false), DEFINE_PROP_END_OF_LIST() }; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 730c06f80a..663f3a5e67 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1378,10 +1378,12 @@ struct X86CPU { bool hyperv_vpindex; bool hyperv_runtime; bool hyperv_synic; + bool hyperv_synic_kvm_only; bool hyperv_stimer; bool hyperv_frequencies; bool hyperv_reenlightenment; bool hyperv_tlbflush; + bool hyperv_ipi; bool check_cpuid; bool enforce_cpuid; bool expose_kvm; diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 9f52bc413a..e193022c03 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -73,7 +73,6 @@ #include "target/i386/cpu.h" HVFState *hvf_state; -int hvf_disabled = 1; static void assert_hvf_ok(hv_return_t ret) { @@ -604,11 +603,6 @@ int hvf_init_vcpu(CPUState *cpu) return 0; } -void hvf_disable(int shouldDisable) -{ - hvf_disabled = shouldDisable; -} - static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info) { X86CPU *x86_cpu = X86_CPU(cpu); @@ -934,7 +928,7 @@ int hvf_vcpu_exec(CPUState *cpu) return ret; } -static bool hvf_allowed; +bool hvf_allowed; static int hvf_accel_init(MachineState *ms) { @@ -942,7 +936,6 @@ static int hvf_accel_init(MachineState *ms) hv_return_t ret; HVFState *s; - hvf_disable(0); ret = hv_vm_create(HV_VM_DEFAULT); assert_hvf_ok(ret); diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c index 2d7540fe7c..2e33b69541 100644 --- a/target/i386/hvf/x86_decode.c +++ b/target/i386/hvf/x86_decode.c @@ -113,7 +113,8 @@ static void decode_modrm_reg(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = decode->modrm.reg; - op->ptr = get_reg_ref(env, op->reg, decode->rex.r, decode->operand_size); + op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.r, + decode->operand_size); } static void decode_rax(CPUX86State *env, struct x86_decode *decode, @@ -121,7 +122,8 @@ static void decode_rax(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = R_EAX; - op->ptr = get_reg_ref(env, op->reg, 0, decode->operand_size); + op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, 0, + decode->operand_size); } static inline void decode_immediate(CPUX86State *env, struct x86_decode *decode, @@ -263,16 +265,16 @@ static void decode_incgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x40; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_decgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x48; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_incgroup2(CPUX86State *env, struct x86_decode *decode) @@ -288,16 +290,16 @@ static void decode_pushgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x50; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_popgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x58; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_jxx(CPUX86State *env, struct x86_decode *decode) @@ -378,16 +380,16 @@ static void decode_xchgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x90; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_movgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0xb8; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); decode_immediate(env, decode, &decode->op[1], decode->operand_size); } @@ -402,8 +404,8 @@ static void decode_movgroup8(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0xb0; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); decode_immediate(env, decode, &decode->op[1], decode->operand_size); } @@ -412,7 +414,8 @@ static void decode_rcx(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = R_ECX; - op->ptr = get_reg_ref(env, op->reg, decode->rex.b, decode->operand_size); + op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.b, + decode->operand_size); } struct decode_tbl { @@ -639,8 +642,8 @@ static void decode_bswap(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[1] - 0xc8; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_d9_4(CPUX86State *env, struct x86_decode *decode) @@ -1686,7 +1689,8 @@ calc_addr: } } -target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size) +target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended, + int size) { target_ulong ptr = 0; int which = 0; @@ -1698,7 +1702,7 @@ target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size) switch (size) { case 1: - if (is_extended || reg < 4) { + if (is_extended || reg < 4 || rex) { which = 1; ptr = (target_ulong)&RL(env, reg); } else { @@ -1714,10 +1718,11 @@ target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size) return ptr; } -target_ulong get_reg_val(CPUX86State *env, int reg, int is_extended, int size) +target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended, + int size) { target_ulong val = 0; - memcpy(&val, (void *)get_reg_ref(env, reg, is_extended, size), size); + memcpy(&val, (void *)get_reg_ref(env, reg, rex, is_extended, size), size); return val; } @@ -1739,7 +1744,8 @@ static target_ulong get_sib_val(CPUX86State *env, struct x86_decode *decode, if (base_reg == R_ESP || base_reg == R_EBP) { *sel = R_SS; } - base = get_reg_val(env, decode->sib.base, decode->rex.b, addr_size); + base = get_reg_val(env, decode->sib.base, decode->rex.rex, + decode->rex.b, addr_size); } if (decode->rex.x) { @@ -1747,7 +1753,8 @@ static target_ulong get_sib_val(CPUX86State *env, struct x86_decode *decode, } if (index_reg != R_ESP) { - scaled_index = get_reg_val(env, index_reg, decode->rex.x, addr_size) << + scaled_index = get_reg_val(env, index_reg, decode->rex.rex, + decode->rex.x, addr_size) << decode->sib.scale; } return base + scaled_index; @@ -1776,7 +1783,8 @@ void calc_modrm_operand32(CPUX86State *env, struct x86_decode *decode, if (decode->modrm.rm == R_EBP || decode->modrm.rm == R_ESP) { seg = R_SS; } - ptr += get_reg_val(env, decode->modrm.rm, decode->rex.b, addr_size); + ptr += get_reg_val(env, decode->modrm.rm, decode->rex.rex, + decode->rex.b, addr_size); } if (X86_DECODE_CMD_LEA == decode->cmd) { @@ -1805,7 +1813,8 @@ void calc_modrm_operand64(CPUX86State *env, struct x86_decode *decode, } else if (0 == mod && 5 == rm) { ptr = RIP(env) + decode->len + (int32_t) offset; } else { - ptr = get_reg_val(env, src, decode->rex.b, 8) + (int64_t) offset; + ptr = get_reg_val(env, src, decode->rex.rex, decode->rex.b, 8) + + (int64_t) offset; } if (X86_DECODE_CMD_LEA == decode->cmd) { @@ -1822,8 +1831,8 @@ void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, if (3 == decode->modrm.mod) { op->reg = decode->modrm.reg; op->type = X86_VAR_REG; - op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.b, - decode->operand_size); + op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.rex, + decode->rex.b, decode->operand_size); return; } diff --git a/target/i386/hvf/x86_decode.h b/target/i386/hvf/x86_decode.h index 5ab6f31fa5..ef4bcab310 100644 --- a/target/i386/hvf/x86_decode.h +++ b/target/i386/hvf/x86_decode.h @@ -303,8 +303,10 @@ uint64_t sign(uint64_t val, int size); uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode); -target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size); -target_ulong get_reg_val(CPUX86State *env, int reg, int is_extended, int size); +target_ulong get_reg_ref(CPUX86State *env, int reg, int rex, int is_extended, + int size); +target_ulong get_reg_val(CPUX86State *env, int reg, int rex, int is_extended, + int size); void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, struct x86_decode_op *op); target_ulong decode_linear_addr(CPUX86State *env, struct x86_decode *decode, diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index d6d5a79293..8c572cd7c2 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -1,7 +1,7 @@ /* - * Definitions for Hyper-V guest/hypervisor interaction + * Definitions for Hyper-V guest/hypervisor interaction - x86-specific part * - * Copyright (C) 2017 Parallels International GmbH + * Copyright (c) 2017-2018 Virtuozzo International GmbH. * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. @@ -10,7 +10,7 @@ #ifndef TARGET_I386_HYPERV_PROTO_H #define TARGET_I386_HYPERV_PROTO_H -#include "qemu/bitmap.h" +#include "hw/hyperv/hyperv-proto.h" #define HV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 #define HV_CPUID_INTERFACE 0x40000001 @@ -58,6 +58,7 @@ #define HV_APIC_ACCESS_RECOMMENDED (1u << 3) #define HV_SYSTEM_RESET_RECOMMENDED (1u << 4) #define HV_RELAXED_TIMING_RECOMMENDED (1u << 5) +#define HV_CLUSTER_IPI_RECOMMENDED (1u << 10) #define HV_EX_PROCESSOR_MASKS_RECOMMENDED (1u << 11) /* @@ -137,25 +138,6 @@ #define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 #define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 -/* - * Hypercall status code - */ -#define HV_STATUS_SUCCESS 0 -#define HV_STATUS_INVALID_HYPERCALL_CODE 2 -#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 -#define HV_STATUS_INVALID_ALIGNMENT 4 -#define HV_STATUS_INVALID_PARAMETER 5 -#define HV_STATUS_INSUFFICIENT_MEMORY 11 -#define HV_STATUS_INVALID_CONNECTION_ID 18 -#define HV_STATUS_INSUFFICIENT_BUFFERS 19 - -/* - * Hypercall numbers - */ -#define HV_POST_MESSAGE 0x005c -#define HV_SIGNAL_EVENT 0x005d -#define HV_HYPERCALL_FAST (1u << 16) - /* * Hypercall MSR bits */ @@ -165,7 +147,6 @@ * Synthetic interrupt controller definitions */ #define HV_SYNIC_VERSION 1 -#define HV_SINT_COUNT 16 #define HV_SYNIC_ENABLE (1u << 0) #define HV_SIMP_ENABLE (1u << 0) #define HV_SIEFP_ENABLE (1u << 0) @@ -175,94 +156,5 @@ #define HV_STIMER_COUNT 4 -/* - * Message size - */ -#define HV_MESSAGE_PAYLOAD_SIZE 240 - -/* - * Message types - */ -#define HV_MESSAGE_NONE 0x00000000 -#define HV_MESSAGE_VMBUS 0x00000001 -#define HV_MESSAGE_UNMAPPED_GPA 0x80000000 -#define HV_MESSAGE_GPA_INTERCEPT 0x80000001 -#define HV_MESSAGE_TIMER_EXPIRED 0x80000010 -#define HV_MESSAGE_INVALID_VP_REGISTER_VALUE 0x80000020 -#define HV_MESSAGE_UNRECOVERABLE_EXCEPTION 0x80000021 -#define HV_MESSAGE_UNSUPPORTED_FEATURE 0x80000022 -#define HV_MESSAGE_EVENTLOG_BUFFERCOMPLETE 0x80000040 -#define HV_MESSAGE_X64_IOPORT_INTERCEPT 0x80010000 -#define HV_MESSAGE_X64_MSR_INTERCEPT 0x80010001 -#define HV_MESSAGE_X64_CPUID_INTERCEPT 0x80010002 -#define HV_MESSAGE_X64_EXCEPTION_INTERCEPT 0x80010003 -#define HV_MESSAGE_X64_APIC_EOI 0x80010004 -#define HV_MESSAGE_X64_LEGACY_FP_ERROR 0x80010005 - -/* - * Message flags - */ -#define HV_MESSAGE_FLAG_PENDING 0x1 - -/* - * Event flags number per SINT - */ -#define HV_EVENT_FLAGS_COUNT (256 * 8) - -/* - * Connection id valid bits - */ -#define HV_CONNECTION_ID_MASK 0x00ffffff - -/* - * Input structure for POST_MESSAGE hypercall - */ -struct hyperv_post_message_input { - uint32_t connection_id; - uint32_t _reserved; - uint32_t message_type; - uint32_t payload_size; - uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; -}; - -/* - * Input structure for SIGNAL_EVENT hypercall - */ -struct hyperv_signal_event_input { - uint32_t connection_id; - uint16_t flag_number; - uint16_t _reserved_zero; -}; - -/* - * SynIC message structures - */ -struct hyperv_message_header { - uint32_t message_type; - uint8_t payload_size; - uint8_t message_flags; /* HV_MESSAGE_FLAG_XX */ - uint8_t _reserved[2]; - uint64_t sender; -}; - -struct hyperv_message { - struct hyperv_message_header header; - uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; -}; - -struct hyperv_message_page { - struct hyperv_message slot[HV_SINT_COUNT]; -}; - -/* - * SynIC event flags structures - */ -struct hyperv_event_flags { - DECLARE_BITMAP(flags, HV_EVENT_FLAGS_COUNT); -}; - -struct hyperv_event_flags_page { - struct hyperv_event_flags slot[HV_SINT_COUNT]; -}; #endif diff --git a/target/i386/hyperv-stub.c b/target/i386/hyperv-stub.c new file mode 100644 index 0000000000..fe548cbae2 --- /dev/null +++ b/target/i386/hyperv-stub.c @@ -0,0 +1,48 @@ +/* + * Stubs for CONFIG_HYPERV=n + * + * Copyright (c) 2015-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hyperv.h" + +#ifdef CONFIG_KVM +int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) +{ + switch (exit->type) { + case KVM_EXIT_HYPERV_SYNIC: + if (!cpu->hyperv_synic) { + return -1; + } + + /* + * Tracking the changes in the MSRs is unnecessary as there are no + * users for them beside save/load, which is handled nicely by the + * generic MSR save/load code + */ + return 0; + case KVM_EXIT_HYPERV_HCALL: + exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE; + return 0; + default: + return -1; + } +} +#endif + +int hyperv_x86_synic_add(X86CPU *cpu) +{ + return -ENOSYS; +} + +void hyperv_x86_synic_reset(X86CPU *cpu) +{ +} + +void hyperv_x86_synic_update(X86CPU *cpu) +{ +} diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index 3065d765ed..b264a28620 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -14,16 +14,36 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "hyperv.h" +#include "hw/hyperv/hyperv.h" #include "hyperv-proto.h" -uint32_t hyperv_vp_index(X86CPU *cpu) +int hyperv_x86_synic_add(X86CPU *cpu) { - return CPU(cpu)->cpu_index; + hyperv_synic_add(CPU(cpu)); + return 0; } -X86CPU *hyperv_find_vcpu(uint32_t vp_index) +void hyperv_x86_synic_reset(X86CPU *cpu) { - return X86_CPU(qemu_get_cpu(vp_index)); + hyperv_synic_reset(CPU(cpu)); +} + +void hyperv_x86_synic_update(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + bool enable = env->msr_hv_synic_control & HV_SYNIC_ENABLE; + hwaddr msg_page_addr = (env->msr_hv_synic_msg_page & HV_SIMP_ENABLE) ? + (env->msr_hv_synic_msg_page & TARGET_PAGE_MASK) : 0; + hwaddr event_page_addr = (env->msr_hv_synic_evt_page & HV_SIEFP_ENABLE) ? + (env->msr_hv_synic_evt_page & TARGET_PAGE_MASK) : 0; + hyperv_synic_update(CPU(cpu), enable, msg_page_addr, event_page_addr); +} + +static void async_synic_update(CPUState *cs, run_on_cpu_data data) +{ + qemu_mutex_lock_iothread(); + hyperv_x86_synic_update(X86_CPU(cs)); + qemu_mutex_unlock_iothread(); } int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) @@ -36,11 +56,6 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) return -1; } - /* - * For now just track changes in SynIC control and msg/evt pages msr's. - * When SynIC messaging/events processing will be added in future - * here we will do messages queues flushing and pages remapping. - */ switch (exit->u.synic.msr) { case HV_X64_MSR_SCONTROL: env->msr_hv_synic_control = exit->u.synic.control; @@ -54,98 +69,33 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) default: return -1; } + + /* + * this will run in this cpu thread before it returns to KVM, but in a + * safe environment (i.e. when all cpus are quiescent) -- this is + * necessary because memory hierarchy is being changed + */ + async_safe_run_on_cpu(CPU(cpu), async_synic_update, RUN_ON_CPU_NULL); + return 0; case KVM_EXIT_HYPERV_HCALL: { - uint16_t code; + uint16_t code = exit->u.hcall.input & 0xffff; + bool fast = exit->u.hcall.input & HV_HYPERCALL_FAST; + uint64_t param = exit->u.hcall.params[0]; - code = exit->u.hcall.input & 0xffff; switch (code) { case HV_POST_MESSAGE: + exit->u.hcall.result = hyperv_hcall_post_message(param, fast); + break; case HV_SIGNAL_EVENT: + exit->u.hcall.result = hyperv_hcall_signal_event(param, fast); + break; default: exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE; - return 0; } + return 0; } default: return -1; } } - -static void kvm_hv_sint_ack_handler(EventNotifier *notifier) -{ - HvSintRoute *sint_route = container_of(notifier, HvSintRoute, - sint_ack_notifier); - event_notifier_test_and_clear(notifier); - if (sint_route->sint_ack_clb) { - sint_route->sint_ack_clb(sint_route); - } -} - -HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb) -{ - HvSintRoute *sint_route; - int r, gsi; - - sint_route = g_malloc0(sizeof(*sint_route)); - r = event_notifier_init(&sint_route->sint_set_notifier, false); - if (r) { - goto err; - } - - r = event_notifier_init(&sint_route->sint_ack_notifier, false); - if (r) { - goto err_sint_set_notifier; - } - - event_notifier_set_handler(&sint_route->sint_ack_notifier, - kvm_hv_sint_ack_handler); - - gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); - if (gsi < 0) { - goto err_gsi; - } - - r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, - &sint_route->sint_set_notifier, - &sint_route->sint_ack_notifier, gsi); - if (r) { - goto err_irqfd; - } - sint_route->gsi = gsi; - sint_route->sint_ack_clb = sint_ack_clb; - sint_route->vp_index = vp_index; - sint_route->sint = sint; - - return sint_route; - -err_irqfd: - kvm_irqchip_release_virq(kvm_state, gsi); -err_gsi: - event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); - event_notifier_cleanup(&sint_route->sint_ack_notifier); -err_sint_set_notifier: - event_notifier_cleanup(&sint_route->sint_set_notifier); -err: - g_free(sint_route); - - return NULL; -} - -void kvm_hv_sint_route_destroy(HvSintRoute *sint_route) -{ - kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, - &sint_route->sint_set_notifier, - sint_route->gsi); - kvm_irqchip_release_virq(kvm_state, sint_route->gsi); - event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); - event_notifier_cleanup(&sint_route->sint_ack_notifier); - event_notifier_cleanup(&sint_route->sint_set_notifier); - g_free(sint_route); -} - -int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route) -{ - return event_notifier_set(&sint_route->sint_set_notifier); -} diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h index 00c9b454bb..67543296c3 100644 --- a/target/i386/hyperv.h +++ b/target/i386/hyperv.h @@ -16,30 +16,14 @@ #include "cpu.h" #include "sysemu/kvm.h" -#include "qemu/event_notifier.h" - -typedef struct HvSintRoute HvSintRoute; -typedef void (*HvSintAckClb)(HvSintRoute *sint_route); - -struct HvSintRoute { - uint32_t sint; - uint32_t vp_index; - int gsi; - EventNotifier sint_set_notifier; - EventNotifier sint_ack_notifier; - HvSintAckClb sint_ack_clb; -}; +#include "hw/hyperv/hyperv.h" +#ifdef CONFIG_KVM int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit); +#endif -HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb); - -void kvm_hv_sint_route_destroy(HvSintRoute *sint_route); - -int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route); - -uint32_t hyperv_vp_index(X86CPU *cpu); -X86CPU *hyperv_find_vcpu(uint32_t vp_index); +int hyperv_x86_synic_add(X86CPU *cpu); +void hyperv_x86_synic_reset(X86CPU *cpu); +void hyperv_x86_synic_update(X86CPU *cpu); #endif diff --git a/target/i386/kvm.c b/target/i386/kvm.c index dc4047b02f..115d8b4c14 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -608,7 +608,8 @@ static bool hyperv_enabled(X86CPU *cpu) cpu->hyperv_synic || cpu->hyperv_stimer || cpu->hyperv_reenlightenment || - cpu->hyperv_tlbflush); + cpu->hyperv_tlbflush || + cpu->hyperv_ipi); } static int kvm_arch_set_tsc_khz(CPUState *cs) @@ -733,9 +734,20 @@ static int hyperv_handle_properties(CPUState *cs) env->features[FEAT_HYPERV_EAX] |= HV_VP_RUNTIME_AVAILABLE; } if (cpu->hyperv_synic) { - if (!has_msr_hv_synic || - kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_SYNIC, 0)) { - fprintf(stderr, "Hyper-V SynIC is not supported by kernel\n"); + unsigned int cap = KVM_CAP_HYPERV_SYNIC; + if (!cpu->hyperv_synic_kvm_only) { + if (!cpu->hyperv_vpindex) { + fprintf(stderr, "Hyper-V SynIC " + "(requested by 'hv-synic' cpu flag) " + "requires Hyper-V VP_INDEX ('hv-vpindex')\n"); + return -ENOSYS; + } + cap = KVM_CAP_HYPERV_SYNIC2; + } + + if (!has_msr_hv_synic || !kvm_check_extension(cs->kvm_state, cap)) { + fprintf(stderr, "Hyper-V SynIC (requested by 'hv-synic' cpu flag) " + "is not supported by kernel\n"); return -ENOSYS; } @@ -753,12 +765,14 @@ static int hyperv_handle_properties(CPUState *cs) static int hyperv_init_vcpu(X86CPU *cpu) { + CPUState *cs = CPU(cpu); + int ret; + if (cpu->hyperv_vpindex && !hv_vpindex_settable) { /* * the kernel doesn't support setting vp_index; assert that its value * is in sync */ - int ret; struct { struct kvm_msrs info; struct kvm_msr_entry entries[1]; @@ -767,18 +781,38 @@ static int hyperv_init_vcpu(X86CPU *cpu) .entries[0].index = HV_X64_MSR_VP_INDEX, }; - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); + ret = kvm_vcpu_ioctl(cs, KVM_GET_MSRS, &msr_data); if (ret < 0) { return ret; } assert(ret == 1); - if (msr_data.entries[0].data != hyperv_vp_index(cpu)) { + if (msr_data.entries[0].data != hyperv_vp_index(CPU(cpu))) { error_report("kernel's vp_index != QEMU's vp_index"); return -ENXIO; } } + if (cpu->hyperv_synic) { + uint32_t synic_cap = cpu->hyperv_synic_kvm_only ? + KVM_CAP_HYPERV_SYNIC : KVM_CAP_HYPERV_SYNIC2; + ret = kvm_vcpu_enable_cap(cs, synic_cap, 0); + if (ret < 0) { + error_report("failed to turn on HyperV SynIC in KVM: %s", + strerror(-ret)); + return ret; + } + + if (!cpu->hyperv_synic_kvm_only) { + ret = hyperv_x86_synic_add(cpu); + if (ret < 0) { + error_report("failed to create HyperV SynIC: %s", + strerror(-ret)); + return ret; + } + } + } + return 0; } @@ -888,6 +922,17 @@ int kvm_arch_init_vcpu(CPUState *cs) c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; } + if (cpu->hyperv_ipi) { + if (kvm_check_extension(cs->kvm_state, + KVM_CAP_HYPERV_SEND_IPI) <= 0) { + fprintf(stderr, "Hyper-V IPI send support " + "(requested by 'hv-ipi' cpu flag) " + " is not supported by kernel\n"); + return -ENOSYS; + } + c->eax |= HV_CLUSTER_IPI_RECOMMENDED; + c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; + } c->ebx = cpu->hyperv_spinlock_attempts; @@ -1153,7 +1198,7 @@ int kvm_arch_init_vcpu(CPUState *cs) if (local_err) { error_report_err(local_err); error_free(invtsc_mig_blocker); - goto fail; + return r; } /* for savevm */ vmstate_x86_cpu.unmigratable = 1; @@ -1226,6 +1271,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) for (i = 0; i < ARRAY_SIZE(env->msr_hv_synic_sint); i++) { env->msr_hv_synic_sint[i] = HV_SINT_MASKED; } + + hyperv_x86_synic_reset(cpu); } } @@ -1937,7 +1984,8 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, env->msr_hv_runtime); } if (cpu->hyperv_vpindex && hv_vpindex_settable) { - kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX, hyperv_vp_index(cpu)); + kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX, + hyperv_vp_index(CPU(cpu))); } if (cpu->hyperv_synic) { int j; @@ -2686,7 +2734,6 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) events.exception.nr = env->exception_injected; events.exception.has_error_code = env->has_error_code; events.exception.error_code = env->error_code; - events.exception.pad = 0; events.interrupt.injected = (env->interrupt_injected >= 0); events.interrupt.nr = env->interrupt_injected; @@ -2695,7 +2742,6 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level) events.nmi.injected = env->nmi_injected; events.nmi.pending = env->nmi_pending; events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK); - events.nmi.pad = 0; events.sipi_vector = env->sipi_vector; events.flags = 0; diff --git a/target/i386/machine.c b/target/i386/machine.c index 084c2c73a8..225b5d433b 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -7,6 +7,7 @@ #include "hw/i386/pc.h" #include "hw/isa/isa.h" #include "migration/cpu.h" +#include "hyperv.h" #include "sysemu/kvm.h" @@ -672,11 +673,19 @@ static bool hyperv_synic_enable_needed(void *opaque) return false; } +static int hyperv_synic_post_load(void *opaque, int version_id) +{ + X86CPU *cpu = opaque; + hyperv_x86_synic_update(cpu); + return 0; +} + static const VMStateDescription vmstate_msr_hyperv_synic = { .name = "cpu/msr_hyperv_synic", .version_id = 1, .minimum_version_id = 1, .needed = hyperv_synic_enable_needed, + .post_load = hyperv_synic_post_load, .fields = (VMStateField[]) { VMSTATE_UINT64(env.msr_hv_synic_control, X86CPU), VMSTATE_UINT64(env.msr_hv_synic_evt_page, X86CPU), diff --git a/tests/ptimer-test-stubs.c b/tests/ptimer-test-stubs.c index ca5cc3b13b..54b3fd26f6 100644 --- a/tests/ptimer-test-stubs.c +++ b/tests/ptimer-test-stubs.c @@ -34,14 +34,19 @@ int64_t ptimer_test_time_ns; int use_icount = 1; bool qtest_allowed; -void timer_init_tl(QEMUTimer *ts, - QEMUTimerList *timer_list, int scale, - QEMUTimerCB *cb, void *opaque) +void timer_init_full(QEMUTimer *ts, + QEMUTimerListGroup *timer_list_group, QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) { - ts->timer_list = timer_list; + if (!timer_list_group) { + timer_list_group = &main_loop_tlg; + } + ts->timer_list = timer_list_group->tl[type]; ts->cb = cb; ts->opaque = opaque; ts->scale = scale; + ts->attributes = attributes; ts->expire_time = -1; } diff --git a/ui/input.c b/ui/input.c index dd7f6d7f21..7c9a4109c4 100644 --- a/ui/input.c +++ b/ui/input.c @@ -271,7 +271,7 @@ static void qemu_input_queue_process(void *opaque) item = QTAILQ_FIRST(queue); switch (item->type) { case QEMU_INPUT_QUEUE_DELAY: - timer_mod(item->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_EXT) + timer_mod(item->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + item->delay_ms); return; case QEMU_INPUT_QUEUE_EVENT: @@ -301,7 +301,7 @@ static void qemu_input_queue_delay(struct QemuInputEventQueueHead *queue, queue_count++; if (start_timer) { - timer_mod(item->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_EXT) + timer_mod(item->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + item->delay_ms); } } @@ -448,8 +448,9 @@ void qemu_input_event_send_key_delay(uint32_t delay_ms) } if (!kbd_timer) { - kbd_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_EXT, - qemu_input_queue_process, &kbd_queue); + kbd_timer = timer_new_full(NULL, QEMU_CLOCK_VIRTUAL, + SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL, + qemu_input_queue_process, &kbd_queue); } if (queue_count < queue_limit) { qemu_input_queue_delay(&kbd_queue, kbd_timer, diff --git a/util/qemu-timer.c b/util/qemu-timer.c index eb60d8f73a..1cc1b2f2c3 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -339,14 +339,19 @@ int qemu_poll_ns(GPollFD *fds, guint nfds, int64_t timeout) } -void timer_init_tl(QEMUTimer *ts, - QEMUTimerList *timer_list, int scale, - QEMUTimerCB *cb, void *opaque) +void timer_init_full(QEMUTimer *ts, + QEMUTimerListGroup *timer_list_group, QEMUClockType type, + int scale, int attributes, + QEMUTimerCB *cb, void *opaque) { - ts->timer_list = timer_list; + if (!timer_list_group) { + timer_list_group = &main_loop_tlg; + } + ts->timer_list = timer_list_group->tl[type]; ts->cb = cb; ts->opaque = opaque; ts->scale = scale; + ts->attributes = attributes; ts->expire_time = -1; } @@ -484,6 +489,7 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) bool progress = false; QEMUTimerCB *cb; void *opaque; + bool need_replay_checkpoint = false; if (!atomic_read(&timer_list->active_timers)) { return false; @@ -496,12 +502,18 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) switch (timer_list->clock->type) { case QEMU_CLOCK_REALTIME: - case QEMU_CLOCK_VIRTUAL_EXT: break; default: case QEMU_CLOCK_VIRTUAL: - if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) { - goto out; + if (replay_mode != REPLAY_MODE_NONE) { + /* Checkpoint for virtual clock is redundant in cases where + * it's being triggered with only non-EXTERNAL timers, because + * these timers don't change guest state directly. + * Since it has conditional dependence on specific timers, it is + * subject to race conditions and requires special handling. + * See below. + */ + need_replay_checkpoint = true; } break; case QEMU_CLOCK_HOST: @@ -516,14 +528,39 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) break; } + /* + * Extract expired timers from active timers list and and process them. + * + * In rr mode we need "filtered" checkpointing for virtual clock. The + * checkpoint must be recorded/replayed before processing any non-EXTERNAL timer, + * and that must only be done once since the clock value stays the same. Because + * non-EXTERNAL timers may appear in the timers list while it being processed, + * the checkpoint can be issued at a time until no timers are left and we are + * done". + */ current_time = qemu_clock_get_ns(timer_list->clock->type); - for(;;) { - qemu_mutex_lock(&timer_list->active_timers_lock); - ts = timer_list->active_timers; + qemu_mutex_lock(&timer_list->active_timers_lock); + while ((ts = timer_list->active_timers)) { if (!timer_expired_ns(ts, current_time)) { - qemu_mutex_unlock(&timer_list->active_timers_lock); + /* No expired timers left. The checkpoint can be skipped + * if no timers fired or they were all external. + */ break; } + if (need_replay_checkpoint + && !(ts->attributes & QEMU_TIMER_ATTR_EXTERNAL)) { + /* once we got here, checkpoint clock only once */ + need_replay_checkpoint = false; + qemu_mutex_unlock(&timer_list->active_timers_lock); + if (!replay_checkpoint(CHECKPOINT_CLOCK_VIRTUAL)) { + goto out; + } + qemu_mutex_lock(&timer_list->active_timers_lock); + /* The lock was released; start over again in case the list was + * modified. + */ + continue; + } /* remove timer from the list before calling the callback */ timer_list->active_timers = ts->next; @@ -531,12 +568,15 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) ts->expire_time = -1; cb = ts->cb; opaque = ts->opaque; - qemu_mutex_unlock(&timer_list->active_timers_lock); /* run the callback (the timer list can be modified) */ + qemu_mutex_unlock(&timer_list->active_timers_lock); cb(opaque); + qemu_mutex_lock(&timer_list->active_timers_lock); + progress = true; } + qemu_mutex_unlock(&timer_list->active_timers_lock); out: qemu_event_set(&timer_list->timers_done_ev); @@ -598,7 +638,6 @@ int64_t qemu_clock_get_ns(QEMUClockType type) return get_clock(); default: case QEMU_CLOCK_VIRTUAL: - case QEMU_CLOCK_VIRTUAL_EXT: if (use_icount) { return cpu_get_icount(); } else { diff --git a/vl.c b/vl.c index ac3ed17de4..9c345223a0 100644 --- a/vl.c +++ b/vl.c @@ -147,8 +147,15 @@ bool enable_cpu_pm = false; int nb_nics; NICInfo nd_table[MAX_NICS]; int autostart; -static int rtc_utc = 1; -static int rtc_date_offset = -1; /* -1 means no change */ +static enum { + RTC_BASE_UTC, + RTC_BASE_LOCALTIME, + RTC_BASE_DATETIME, +} rtc_base_type = RTC_BASE_UTC; +static time_t rtc_ref_start_datetime; +static int rtc_realtime_clock_offset; /* used only with QEMU_CLOCK_REALTIME */ +static int rtc_host_datetime_offset = -1; /* valid & used only with + RTC_BASE_DATETIME */ QEMUClockType rtc_clock; int vga_interface_type = VGA_NONE; static DisplayOptions dpy; @@ -242,6 +249,7 @@ static struct { static QemuOptsList qemu_rtc_opts = { .name = "rtc", .head = QTAILQ_HEAD_INITIALIZER(qemu_rtc_opts.head), + .merge_lists = true, .desc = { { .name = "base", @@ -780,28 +788,42 @@ void qemu_system_vmstop_request(RunState state) } /***********************************************************/ -/* real time host monotonic timer */ - -static time_t qemu_time(void) +/* RTC reference time/date access */ +static time_t qemu_ref_timedate(QEMUClockType clock) { - return qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000; + time_t value = qemu_clock_get_ms(clock) / 1000; + switch (clock) { + case QEMU_CLOCK_REALTIME: + value -= rtc_realtime_clock_offset; + /* no break */ + case QEMU_CLOCK_VIRTUAL: + value += rtc_ref_start_datetime; + break; + case QEMU_CLOCK_HOST: + if (rtc_base_type == RTC_BASE_DATETIME) { + value -= rtc_host_datetime_offset; + } + break; + default: + assert(0); + } + return value; } -/***********************************************************/ -/* host time/date access */ void qemu_get_timedate(struct tm *tm, int offset) { - time_t ti = qemu_time(); + time_t ti = qemu_ref_timedate(rtc_clock); ti += offset; - if (rtc_date_offset == -1) { - if (rtc_utc) - gmtime_r(&ti, tm); - else - localtime_r(&ti, tm); - } else { - ti -= rtc_date_offset; + + switch (rtc_base_type) { + case RTC_BASE_DATETIME: + case RTC_BASE_UTC: gmtime_r(&ti, tm); + break; + case RTC_BASE_LOCALTIME: + localtime_r(&ti, tm); + break; } } @@ -809,23 +831,28 @@ int qemu_timedate_diff(struct tm *tm) { time_t seconds; - if (rtc_date_offset == -1) - if (rtc_utc) - seconds = mktimegm(tm); - else { - struct tm tmp = *tm; - tmp.tm_isdst = -1; /* use timezone to figure it out */ - seconds = mktime(&tmp); - } - else - seconds = mktimegm(tm) + rtc_date_offset; + switch (rtc_base_type) { + case RTC_BASE_DATETIME: + case RTC_BASE_UTC: + seconds = mktimegm(tm); + break; + case RTC_BASE_LOCALTIME: + { + struct tm tmp = *tm; + tmp.tm_isdst = -1; /* use timezone to figure it out */ + seconds = mktime(&tmp); + break; + } + default: + abort(); + } - return seconds - qemu_time(); + return seconds - qemu_ref_timedate(QEMU_CLOCK_HOST); } -static void configure_rtc_date_offset(const char *startdate) +static void configure_rtc_base_datetime(const char *startdate) { - time_t rtc_start_date; + time_t rtc_start_datetime; struct tm tm; if (sscanf(startdate, "%d-%d-%dT%d:%d:%d", &tm.tm_year, &tm.tm_mon, @@ -841,33 +868,40 @@ static void configure_rtc_date_offset(const char *startdate) } tm.tm_year -= 1900; tm.tm_mon--; - rtc_start_date = mktimegm(&tm); - if (rtc_start_date == -1) { + rtc_start_datetime = mktimegm(&tm); + if (rtc_start_datetime == -1) { date_fail: - error_report("invalid date format"); + error_report("invalid datetime format"); error_printf("valid formats: " "'2006-06-17T16:01:21' or '2006-06-17'\n"); exit(1); } - rtc_date_offset = qemu_time() - rtc_start_date; + rtc_host_datetime_offset = rtc_ref_start_datetime - rtc_start_datetime; + rtc_ref_start_datetime = rtc_start_datetime; } static void configure_rtc(QemuOpts *opts) { const char *value; + /* Set defaults */ + rtc_clock = QEMU_CLOCK_HOST; + rtc_ref_start_datetime = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000; + rtc_realtime_clock_offset = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000; + value = qemu_opt_get(opts, "base"); if (value) { if (!strcmp(value, "utc")) { - rtc_utc = 1; + rtc_base_type = RTC_BASE_UTC; } else if (!strcmp(value, "localtime")) { Error *blocker = NULL; - rtc_utc = 0; + rtc_base_type = RTC_BASE_LOCALTIME; error_setg(&blocker, QERR_REPLAY_NOT_SUPPORTED, "-rtc base=localtime"); replay_add_blocker(blocker); } else { - configure_rtc_date_offset(value); + rtc_base_type = RTC_BASE_DATETIME; + configure_rtc_base_datetime(value); } } value = qemu_opt_get(opts, "clock"); @@ -3017,7 +3051,6 @@ int main(int argc, char **argv, char **envp) error_reportf_err(err, "cannot initialize crypto: "); exit(1); } - rtc_clock = QEMU_CLOCK_HOST; QLIST_INIT (&vm_change_state_head); os_setup_early_signal_handling(); @@ -3737,7 +3770,6 @@ int main(int argc, char **argv, char **envp) if (!opts) { exit(1); } - configure_rtc(opts); break; case QEMU_OPTION_tb_size: #ifndef CONFIG_TCG @@ -3955,6 +3987,8 @@ int main(int argc, char **argv, char **envp) exit(EXIT_FAILURE); } + configure_rtc(qemu_find_opts_singleton("rtc")); + machine_class = select_machine(); set_memory_options(&ram_slots, &maxram_size, machine_class);