/* * Hyper-V guest/hypervisor interaction * * Copyright (c) 2015-2018 Virtuozzo International GmbH. * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. */ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "qemu/module.h" #include "qapi/error.h" #include "exec/address-spaces.h" #include "sysemu/kvm.h" #include "qemu/bitops.h" #include "qemu/error-report.h" #include "qemu/lockable.h" #include "qemu/queue.h" #include "qemu/rcu.h" #include "qemu/rcu_queue.h" #include "hw/hyperv/hyperv.h" typedef struct SynICState { DeviceState parent_obj; CPUState *cs; bool enabled; hwaddr msg_page_addr; hwaddr event_page_addr; MemoryRegion msg_page_mr; MemoryRegion event_page_mr; struct hyperv_message_page *msg_page; struct hyperv_event_flags_page *event_page; } SynICState; #define TYPE_SYNIC "hyperv-synic" #define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC) static SynICState *get_synic(CPUState *cs) { return SYNIC(object_resolve_path_component(OBJECT(cs), "synic")); } static void synic_update(SynICState *synic, bool enable, hwaddr msg_page_addr, hwaddr event_page_addr) { synic->enabled = enable; if (synic->msg_page_addr != msg_page_addr) { if (synic->msg_page_addr) { memory_region_del_subregion(get_system_memory(), &synic->msg_page_mr); } if (msg_page_addr) { memory_region_add_subregion(get_system_memory(), msg_page_addr, &synic->msg_page_mr); } synic->msg_page_addr = msg_page_addr; } if (synic->event_page_addr != event_page_addr) { if (synic->event_page_addr) { memory_region_del_subregion(get_system_memory(), &synic->event_page_mr); } if (event_page_addr) { memory_region_add_subregion(get_system_memory(), event_page_addr, &synic->event_page_mr); } synic->event_page_addr = event_page_addr; } } void hyperv_synic_update(CPUState *cs, bool enable, hwaddr msg_page_addr, hwaddr event_page_addr) { SynICState *synic = get_synic(cs); if (!synic) { return; } synic_update(synic, enable, msg_page_addr, event_page_addr); } static void synic_realize(DeviceState *dev, Error **errp) { Object *obj = OBJECT(dev); SynICState *synic = SYNIC(dev); char *msgp_name, *eventp_name; uint32_t vp_index; /* memory region names have to be globally unique */ vp_index = hyperv_vp_index(synic->cs); msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index); eventp_name = g_strdup_printf("synic-%u-event-page", vp_index); memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name, sizeof(*synic->msg_page), &error_abort); memory_region_init_ram(&synic->event_page_mr, obj, eventp_name, sizeof(*synic->event_page), &error_abort); synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr); synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr); g_free(msgp_name); g_free(eventp_name); } static void synic_reset(DeviceState *dev) { SynICState *synic = SYNIC(dev); memset(synic->msg_page, 0, sizeof(*synic->msg_page)); memset(synic->event_page, 0, sizeof(*synic->event_page)); synic_update(synic, false, 0, 0); } static void synic_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = synic_realize; dc->reset = synic_reset; dc->user_creatable = false; } void hyperv_synic_add(CPUState *cs) { Object *obj; SynICState *synic; obj = object_new(TYPE_SYNIC); synic = SYNIC(obj); synic->cs = cs; object_property_add_child(OBJECT(cs), "synic", obj, &error_abort); object_unref(obj); object_property_set_bool(obj, true, "realized", &error_abort); } void hyperv_synic_reset(CPUState *cs) { SynICState *synic = get_synic(cs); if (synic) { device_legacy_reset(DEVICE(synic)); } } static const TypeInfo synic_type_info = { .name = TYPE_SYNIC, .parent = TYPE_DEVICE, .instance_size = sizeof(SynICState), .class_init = synic_class_init, }; static void synic_register_types(void) { type_register_static(&synic_type_info); } type_init(synic_register_types) /* * KVM has its own message producers (SynIC timers). To guarantee * serialization with both KVM vcpu and the guest cpu, the messages are first * staged in an intermediate area and then posted to the SynIC message page in * the vcpu thread. */ typedef struct HvSintStagedMessage { /* message content staged by hyperv_post_msg */ struct hyperv_message msg; /* callback + data (r/o) to complete the processing in a BH */ HvSintMsgCb cb; void *cb_data; /* message posting status filled by cpu_post_msg */ int status; /* passing the buck: */ enum { /* initial state */ HV_STAGED_MSG_FREE, /* * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE -> * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu */ HV_STAGED_MSG_BUSY, /* * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot, * notify the guest, records the status, marks the posting done (BUSY * -> POSTED), and schedules sint_msg_bh BH */ HV_STAGED_MSG_POSTED, /* * sint_msg_bh (BH) verifies that the posting is done, runs the * callback, and starts over (POSTED -> FREE) */ } state; } HvSintStagedMessage; struct HvSintRoute { uint32_t sint; SynICState *synic; int gsi; EventNotifier sint_set_notifier; EventNotifier sint_ack_notifier; HvSintStagedMessage *staged_msg; unsigned refcount; }; static CPUState *hyperv_find_vcpu(uint32_t vp_index) { CPUState *cs = qemu_get_cpu(vp_index); assert(hyperv_vp_index(cs) == vp_index); return cs; } /* * BH to complete the processing of a staged message. */ static void sint_msg_bh(void *opaque) { HvSintRoute *sint_route = opaque; HvSintStagedMessage *staged_msg = sint_route->staged_msg; if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) { /* status nor ready yet (spurious ack from guest?), ignore */ return; } staged_msg->cb(staged_msg->cb_data, staged_msg->status); staged_msg->status = 0; /* staged message processing finished, ready to start over */ atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE); /* drop the reference taken in hyperv_post_msg */ hyperv_sint_route_unref(sint_route); } /* * Worker to transfer the message from the staging area into the SynIC message * page in vcpu context. */ static void cpu_post_msg(CPUState *cs, run_on_cpu_data data) { HvSintRoute *sint_route = data.host_ptr; HvSintStagedMessage *staged_msg = sint_route->staged_msg; SynICState *synic = sint_route->synic; struct hyperv_message *dst_msg; bool wait_for_sint_ack = false; assert(staged_msg->state == HV_STAGED_MSG_BUSY); if (!synic->enabled || !synic->msg_page_addr) { staged_msg->status = -ENXIO; goto posted; } dst_msg = &synic->msg_page->slot[sint_route->sint]; if (dst_msg->header.message_type != HV_MESSAGE_NONE) { dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING; staged_msg->status = -EAGAIN; wait_for_sint_ack = true; } else { memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg)); staged_msg->status = hyperv_sint_route_set_sint(sint_route); } memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page)); posted: atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED); /* * Notify the msg originator of the progress made; if the slot was busy we * set msg_pending flag in it so it will be the guest who will do EOM and * trigger the notification from KVM via sint_ack_notifier */ if (!wait_for_sint_ack) { aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route); } } /* * Post a Hyper-V message to the staging area, for delivery to guest in the * vcpu thread. */ int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg) { HvSintStagedMessage *staged_msg = sint_route->staged_msg; assert(staged_msg); /* grab the staging area */ if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE, HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) { return -EAGAIN; } memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg)); /* hold a reference on sint_route until the callback is finished */ hyperv_sint_route_ref(sint_route); /* schedule message posting attempt in vcpu thread */ async_run_on_cpu(sint_route->synic->cs, cpu_post_msg, RUN_ON_CPU_HOST_PTR(sint_route)); return 0; } static void sint_ack_handler(EventNotifier *notifier) { HvSintRoute *sint_route = container_of(notifier, HvSintRoute, sint_ack_notifier); event_notifier_test_and_clear(notifier); /* * the guest consumed the previous message so complete the current one with * -EAGAIN and let the msg originator retry */ aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route); } /* * Set given event flag for a given sint on a given vcpu, and signal the sint. */ int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno) { int ret; SynICState *synic = sint_route->synic; unsigned long *flags, set_mask; unsigned set_idx; if (eventno > HV_EVENT_FLAGS_COUNT) { return -EINVAL; } if (!synic->enabled || !synic->event_page_addr) { return -ENXIO; } set_idx = BIT_WORD(eventno); set_mask = BIT_MASK(eventno); flags = synic->event_page->slot[sint_route->sint].flags; if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) { memory_region_set_dirty(&synic->event_page_mr, 0, sizeof(*synic->event_page)); ret = hyperv_sint_route_set_sint(sint_route); } else { ret = 0; } return ret; } HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint, HvSintMsgCb cb, void *cb_data) { HvSintRoute *sint_route; EventNotifier *ack_notifier; int r, gsi; CPUState *cs; SynICState *synic; cs = hyperv_find_vcpu(vp_index); if (!cs) { return NULL; } synic = get_synic(cs); if (!synic) { return NULL; } sint_route = g_new0(HvSintRoute, 1); r = event_notifier_init(&sint_route->sint_set_notifier, false); if (r) { goto err; } ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL; if (ack_notifier) { sint_route->staged_msg = g_new0(HvSintStagedMessage, 1); sint_route->staged_msg->cb = cb; sint_route->staged_msg->cb_data = cb_data; r = event_notifier_init(ack_notifier, false); if (r) { goto err_sint_set_notifier; } event_notifier_set_handler(ack_notifier, sint_ack_handler); } gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); if (gsi < 0) { goto err_gsi; } r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &sint_route->sint_set_notifier, ack_notifier, gsi); if (r) { goto err_irqfd; } sint_route->gsi = gsi; sint_route->synic = synic; sint_route->sint = sint; sint_route->refcount = 1; return sint_route; err_irqfd: kvm_irqchip_release_virq(kvm_state, gsi); err_gsi: if (ack_notifier) { event_notifier_set_handler(ack_notifier, NULL); event_notifier_cleanup(ack_notifier); g_free(sint_route->staged_msg); } err_sint_set_notifier: event_notifier_cleanup(&sint_route->sint_set_notifier); err: g_free(sint_route); return NULL; } void hyperv_sint_route_ref(HvSintRoute *sint_route) { sint_route->refcount++; } void hyperv_sint_route_unref(HvSintRoute *sint_route) { if (!sint_route) { return; } assert(sint_route->refcount > 0); if (--sint_route->refcount) { return; } kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &sint_route->sint_set_notifier, sint_route->gsi); kvm_irqchip_release_virq(kvm_state, sint_route->gsi); if (sint_route->staged_msg) { event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); event_notifier_cleanup(&sint_route->sint_ack_notifier); g_free(sint_route->staged_msg); } event_notifier_cleanup(&sint_route->sint_set_notifier); g_free(sint_route); } int hyperv_sint_route_set_sint(HvSintRoute *sint_route) { return event_notifier_set(&sint_route->sint_set_notifier); } typedef struct MsgHandler { struct rcu_head rcu; QLIST_ENTRY(MsgHandler) link; uint32_t conn_id; HvMsgHandler handler; void *data; } MsgHandler; typedef struct EventFlagHandler { struct rcu_head rcu; QLIST_ENTRY(EventFlagHandler) link; uint32_t conn_id; EventNotifier *notifier; } EventFlagHandler; static QLIST_HEAD(, MsgHandler) msg_handlers; static QLIST_HEAD(, EventFlagHandler) event_flag_handlers; static QemuMutex handlers_mutex; static void __attribute__((constructor)) hv_init(void) { QLIST_INIT(&msg_handlers); QLIST_INIT(&event_flag_handlers); qemu_mutex_init(&handlers_mutex); } int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data) { int ret; MsgHandler *mh; QEMU_LOCK_GUARD(&handlers_mutex); QLIST_FOREACH(mh, &msg_handlers, link) { if (mh->conn_id == conn_id) { if (handler) { ret = -EEXIST; } else { QLIST_REMOVE_RCU(mh, link); g_free_rcu(mh, rcu); ret = 0; } return ret; } } if (handler) { mh = g_new(MsgHandler, 1); mh->conn_id = conn_id; mh->handler = handler; mh->data = data; QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link); ret = 0; } else { ret = -ENOENT; } return ret; } uint16_t hyperv_hcall_post_message(uint64_t param, bool fast) { uint16_t ret; hwaddr len; struct hyperv_post_message_input *msg; MsgHandler *mh; if (fast) { return HV_STATUS_INVALID_HYPERCALL_CODE; } if (param & (__alignof__(*msg) - 1)) { return HV_STATUS_INVALID_ALIGNMENT; } len = sizeof(*msg); msg = cpu_physical_memory_map(param, &len, 0); if (len < sizeof(*msg)) { ret = HV_STATUS_INSUFFICIENT_MEMORY; goto unmap; } if (msg->payload_size > sizeof(msg->payload)) { ret = HV_STATUS_INVALID_HYPERCALL_INPUT; goto unmap; } ret = HV_STATUS_INVALID_CONNECTION_ID; WITH_RCU_READ_LOCK_GUARD() { QLIST_FOREACH_RCU(mh, &msg_handlers, link) { if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) { ret = mh->handler(msg, mh->data); break; } } } unmap: cpu_physical_memory_unmap(msg, len, 0, 0); return ret; } static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) { int ret; EventFlagHandler *handler; QEMU_LOCK_GUARD(&handlers_mutex); QLIST_FOREACH(handler, &event_flag_handlers, link) { if (handler->conn_id == conn_id) { if (notifier) { ret = -EEXIST; } else { QLIST_REMOVE_RCU(handler, link); g_free_rcu(handler, rcu); ret = 0; } return ret; } } if (notifier) { handler = g_new(EventFlagHandler, 1); handler->conn_id = conn_id; handler->notifier = notifier; QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link); ret = 0; } else { ret = -ENOENT; } return ret; } static bool process_event_flags_userspace; int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier) { if (!process_event_flags_userspace && !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) { process_event_flags_userspace = true; warn_report("Hyper-V event signaling is not supported by this kernel; " "using slower userspace hypercall processing"); } if (!process_event_flags_userspace) { struct kvm_hyperv_eventfd hvevfd = { .conn_id = conn_id, .fd = notifier ? event_notifier_get_fd(notifier) : -1, .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN, }; return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd); } return set_event_flag_handler(conn_id, notifier); } uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast) { EventFlagHandler *handler; if (unlikely(!fast)) { hwaddr addr = param; if (addr & (__alignof__(addr) - 1)) { return HV_STATUS_INVALID_ALIGNMENT; } param = ldq_phys(&address_space_memory, addr); } /* * Per spec, bits 32-47 contain the extra "flag number". However, we * have no use for it, and in all known usecases it is zero, so just * report lookup failure if it isn't. */ if (param & 0xffff00000000ULL) { return HV_STATUS_INVALID_PORT_ID; } /* remaining bits are reserved-zero */ if (param & ~HV_CONNECTION_ID_MASK) { return HV_STATUS_INVALID_HYPERCALL_INPUT; } RCU_READ_LOCK_GUARD(); QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) { if (handler->conn_id == param) { event_notifier_set(handler->notifier); return 0; } } return HV_STATUS_INVALID_CONNECTION_ID; }