From c3543fb5fe4520f03dd4fef04fab7745eeca1c96 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 7 Nov 2014 13:22:32 +0100 Subject: [PATCH 01/13] esp-pci: fixup deadlock with linux A linux guest will be issuing messages: [ 32.124042] DC390: Deadlock in DataIn_0: DMA aborted unfinished: 000000 bytes remain!! [ 32.126348] DC390: DataIn_0: DMA State: 0 and the HBA will fail to work properly. Reason is the emulation is not setting the 'DMA transfer done' status correctly. Signed-off-by: Hannes Reinecke Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- hw/scsi/esp-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/scsi/esp-pci.c b/hw/scsi/esp-pci.c index 82795e68b8..77b8647fca 100644 --- a/hw/scsi/esp-pci.c +++ b/hw/scsi/esp-pci.c @@ -268,6 +268,8 @@ static void esp_pci_dma_memory_rw(PCIESPState *pci, uint8_t *buf, int len, /* update status registers */ pci->dma_regs[DMA_WBC] -= len; pci->dma_regs[DMA_WAC] += len; + if (pci->dma_regs[DMA_WBC] == 0) + pci->dma_regs[DMA_STAT] |= DMA_STAT_DONE; } static void esp_pci_dma_memory_read(void *opaque, uint8_t *buf, int len) From 55783a5521a3b1f93ee6a072e414a27c6cfa15f0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 7 Nov 2014 14:00:02 +0100 Subject: [PATCH 02/13] virtio-scsi: work around bug in old BIOSes Old BIOSes left some padding by mistake after the req_size/resp_size. New QEMU does not like it, thinking it is a bidirectional command. As a workaround, we can check if the ANY_LAYOUT bit is set; if not, we always consider the first buffer as the virtio-scsi request/response, because, back when QEMU did not support ANY_LAYOUT, it expected the payload to start at the second element of the iovec. This can show up during migration. Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- hw/scsi/virtio-scsi.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index fdcacfd79a..ef485508b1 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -118,6 +118,7 @@ static size_t qemu_sgl_concat(VirtIOSCSIReq *req, struct iovec *iov, static int virtio_scsi_parse_req(VirtIOSCSIReq *req, unsigned req_size, unsigned resp_size) { + VirtIODevice *vdev = (VirtIODevice *) req->dev; size_t in_size, out_size; if (iov_to_buf(req->elem.out_sg, req->elem.out_num, 0, @@ -130,8 +131,24 @@ static int virtio_scsi_parse_req(VirtIOSCSIReq *req, resp_size) < resp_size) { return -EINVAL; } + req->resp_size = resp_size; + /* Old BIOSes left some padding by mistake after the req_size/resp_size. + * As a workaround, always consider the first buffer as the virtio-scsi + * request/response, making the payload start at the second element + * of the iovec. + * + * The actual length of the response header, stored in req->resp_size, + * does not change. + * + * TODO: always disable this workaround for virtio 1.0 devices. + */ + if ((vdev->guest_features & VIRTIO_F_ANY_LAYOUT) == 0) { + req_size = req->elem.out_sg[0].iov_len; + resp_size = req->elem.in_sg[0].iov_len; + } + out_size = qemu_sgl_concat(req, req->elem.out_sg, &req->elem.out_addr[0], req->elem.out_num, req_size); From 25aaa2c568a11bd79b7b83c857278232f6fa7be6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 10 Nov 2014 13:58:14 +0100 Subject: [PATCH 03/13] esp: fix coding standards Reported-by: Mark Cave-Ayland Signed-off-by: Paolo Bonzini --- hw/scsi/esp-pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/scsi/esp-pci.c b/hw/scsi/esp-pci.c index 77b8647fca..00b7297354 100644 --- a/hw/scsi/esp-pci.c +++ b/hw/scsi/esp-pci.c @@ -268,8 +268,9 @@ static void esp_pci_dma_memory_rw(PCIESPState *pci, uint8_t *buf, int len, /* update status registers */ pci->dma_regs[DMA_WBC] -= len; pci->dma_regs[DMA_WAC] += len; - if (pci->dma_regs[DMA_WBC] == 0) + if (pci->dma_regs[DMA_WBC] == 0) { pci->dma_regs[DMA_STAT] |= DMA_STAT_DONE; + } } static void esp_pci_dma_memory_read(void *opaque, uint8_t *buf, int len) From ed4b43265d4d0c7ecfbbcb4001f61700756f22b9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 11 Nov 2014 09:17:09 +0800 Subject: [PATCH 04/13] virtio-scsi: dataplane: fix allocation for 'cmd_vrings' The size of each element should be sizeof(VirtIOSCSIVring *). Signed-off-by: Ming Lei Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- hw/scsi/virtio-scsi-dataplane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index 9651e6f274..969b931557 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -230,7 +230,7 @@ void virtio_scsi_dataplane_start(VirtIOSCSI *s) if (!s->event_vring) { goto fail_vrings; } - s->cmd_vrings = g_malloc0(sizeof(VirtIOSCSIVring) * vs->conf.num_queues); + s->cmd_vrings = g_new(VirtIOSCSIVring *, vs->conf.num_queues); for (i = 0; i < vs->conf.num_queues; i++) { s->cmd_vrings[i] = virtio_scsi_vring_init(s, vs->cmd_vqs[i], From c9cf45c1a475e594c560862d9df35b16e3a42702 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 10 Nov 2014 16:52:55 +0100 Subject: [PATCH 05/13] esp: Do not overwrite ESP_TCHI after reset After a reset ESP_TCHI should contain the unique ID of the chip. This value will be overwritten with the current tranfer count if the transfer count has previously been set. So we should always return the chip id if ESP_TCHI has never been written to. Signed-off-by: Hannes Reinecke Signed-off-by: Paolo Bonzini --- hw/scsi/esp.c | 11 +++++++++-- include/hw/scsi/esp.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c index 5ab44d860b..272d13d633 100644 --- a/hw/scsi/esp.c +++ b/hw/scsi/esp.c @@ -364,7 +364,7 @@ void esp_hard_reset(ESPState *s) { memset(s->rregs, 0, ESP_REGS); memset(s->wregs, 0, ESP_REGS); - s->rregs[ESP_TCHI] = s->chip_id; + s->tchi_written = 0; s->ti_size = 0; s->ti_rptr = 0; s->ti_wptr = 0; @@ -422,6 +422,11 @@ uint64_t esp_reg_read(ESPState *s, uint32_t saddr) esp_lower_irq(s); return old_val; + case ESP_TCHI: + /* Return the unique id if the value has never been written */ + if (!s->tchi_written) { + return s->chip_id; + } default: break; } @@ -432,9 +437,11 @@ void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val) { trace_esp_mem_writeb(saddr, s->wregs[saddr], val); switch (saddr) { + case ESP_TCHI: + s->tchi_written = true; + /* fall through */ case ESP_TCLO: case ESP_TCMID: - case ESP_TCHI: s->rregs[ESP_RSTAT] &= ~STAT_TC; break; case ESP_FIFO: diff --git a/include/hw/scsi/esp.h b/include/hw/scsi/esp.h index e079fb8d16..6c795276c9 100644 --- a/include/hw/scsi/esp.h +++ b/include/hw/scsi/esp.h @@ -22,6 +22,7 @@ struct ESPState { uint8_t wregs[ESP_REGS]; qemu_irq irq; uint8_t chip_id; + bool tchi_written; int32_t ti_size; uint32_t ti_rptr, ti_wptr; uint32_t status; From 6012ca8159a7f78e6fc5122a8e9f22d82e9723e9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 12 Nov 2014 11:24:35 +0800 Subject: [PATCH 06/13] virtio-scsi: dataplane: suppress guest notification This patch uses vring_should_notify() to suppress guest notification, and looks notification frequency can be decreased from ~33K/sec to ~2K/sec in my test environment. Suggested-by: Stefan Hajnoczi Signed-off-by: Ming Lei Signed-off-by: Paolo Bonzini --- hw/scsi/virtio-scsi-dataplane.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index 969b931557..03a1e8cfcf 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -92,9 +92,14 @@ VirtIOSCSIReq *virtio_scsi_pop_req_vring(VirtIOSCSI *s, void virtio_scsi_vring_push_notify(VirtIOSCSIReq *req) { + VirtIODevice *vdev = VIRTIO_DEVICE(req->vring->parent); + vring_push(&req->vring->vring, &req->elem, req->qsgl.size + req->resp_iov.size); - event_notifier_set(&req->vring->guest_notifier); + + if (vring_should_notify(vdev, &req->vring->vring)) { + event_notifier_set(&req->vring->guest_notifier); + } } static void virtio_scsi_iothread_handle_ctrl(EventNotifier *notifier) From f69c11158509e91c29f01e48ace8af227827411a Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 12 Nov 2014 19:29:55 +0800 Subject: [PATCH 07/13] virtio-scsi: Fix comment for VirtIOSCSIReq The cdb is not zeroed by virtio_scsi_init_req, so fix the misleading comment. Suggested-by: Markus Armbruster Signed-off-by: Fam Zheng Signed-off-by: Paolo Bonzini --- include/hw/virtio/virtio-scsi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index 9e1a49c2c1..bf17cc9ea5 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -209,7 +209,8 @@ typedef struct VirtIOSCSIReq { /* Note: * - fields before elem are initialized by virtio_scsi_init_req; * - elem is uninitialized at the time of allocation. - * - fields after elem are zeroed by virtio_scsi_init_req. + * - fields after elem (except the ending cdb[]) are zeroed by + * virtio_scsi_init_req. * */ VirtQueueElement elem; From ae67dc72e4f19238941894227d96b6201d71a70a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 12 Nov 2014 12:04:56 +0100 Subject: [PATCH 08/13] target-i386: eliminate dead code and hoist common code out of "if" ist != 0 is checked in the first "if", so it cannot be true in the "else if" part. While at it, simplify the code and move the ESP alignment out of the conditionals. Reported by Coverity. Signed-off-by: Paolo Bonzini --- target-i386/seg_helper.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c index af5c1c6830..c98eeb4351 100644 --- a/target-i386/seg_helper.c +++ b/target-i386/seg_helper.c @@ -883,32 +883,23 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int, } if ((!(e2 & DESC_C_MASK) && dpl < cpl) || ist != 0) { /* to inner privilege */ - if (ist != 0) { - esp = get_rsp_from_tss(env, ist + 3); - } else { - esp = get_rsp_from_tss(env, dpl); - } - esp &= ~0xfLL; /* align stack */ - ss = 0; new_stack = 1; + esp = get_rsp_from_tss(env, ist != 0 ? ist + 3 : dpl); + ss = 0; } else if ((e2 & DESC_C_MASK) || dpl == cpl) { /* to same privilege */ if (env->eflags & VM_MASK) { raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc); } new_stack = 0; - if (ist != 0) { - esp = get_rsp_from_tss(env, ist + 3); - } else { - esp = env->regs[R_ESP]; - } - esp &= ~0xfLL; /* align stack */ + esp = env->regs[R_ESP]; dpl = cpl; } else { raise_exception_err(env, EXCP0D_GPF, selector & 0xfffc); new_stack = 0; /* avoid warning */ esp = 0; /* avoid warning */ } + esp &= ~0xfLL; /* align stack */ PUSHQ(esp, env->segs[R_SS].selector); PUSHQ(esp, env->regs[R_ESP]); From c2c00148ec54f77c9432fec16585834e1d677fda Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Thu, 28 Aug 2014 15:18:57 +0400 Subject: [PATCH 09/13] apic_common: migrate missing fields This patch adds missed sipi_vector and wait_for_sipi fields to a new subsection of the vmstate of the apic_common module. Saving and loading of these fields makes migration of the apic state deterministic. Signed-off-by: Pavel Dovgalyuk [Initialize the field in pre_load and kvm_apic_realize. - Paolo] Signed-off-by: Paolo Bonzini --- hw/i386/kvm/apic.c | 3 +++ hw/intc/apic_common.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c index e873b509a5..271e97f86f 100644 --- a/hw/i386/kvm/apic.c +++ b/hw/i386/kvm/apic.c @@ -175,6 +175,9 @@ static void kvm_apic_realize(DeviceState *dev, Error **errp) { APICCommonState *s = APIC_COMMON(dev); + /* Not used by KVM, which uses the CPU mp_state instead. */ + s->wait_for_sipi = 0; + memory_region_init_io(&s->io_memory, NULL, &kvm_apic_io_ops, s, "kvm-apic-msi", APIC_SPACE_SIZE); diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index ce3d903b13..4e62f25edb 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -324,6 +324,19 @@ static void apic_common_realize(DeviceState *dev, Error **errp) } +static int apic_pre_load(void *opaque) +{ + APICCommonState *s = APIC_COMMON(opaque); + + /* The default is !cpu_is_bsp(s->cpu), but the common value is 0 + * so that's what apic_common_sipi_needed checks for. Reset to + * the value that is assumed when the apic_sipi subsection is + * absent. + */ + s->wait_for_sipi = 0; + return 0; +} + static void apic_dispatch_pre_save(void *opaque) { APICCommonState *s = APIC_COMMON(opaque); @@ -345,12 +358,30 @@ static int apic_dispatch_post_load(void *opaque, int version_id) return 0; } +static bool apic_common_sipi_needed(void *opaque) +{ + APICCommonState *s = APIC_COMMON(opaque); + return s->wait_for_sipi != 0; +} + +static const VMStateDescription vmstate_apic_common_sipi = { + .name = "apic_sipi", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_INT32(sipi_vector, APICCommonState), + VMSTATE_INT32(wait_for_sipi, APICCommonState), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_apic_common = { .name = "apic", .version_id = 3, .minimum_version_id = 3, .minimum_version_id_old = 1, .load_state_old = apic_load_old, + .pre_load = apic_pre_load, .pre_save = apic_dispatch_pre_save, .post_load = apic_dispatch_post_load, .fields = (VMStateField[]) { @@ -375,6 +406,13 @@ static const VMStateDescription vmstate_apic_common = { VMSTATE_INT64(timer_expiry, APICCommonState), /* open-coded timer state */ VMSTATE_END_OF_LIST() + }, + .subsections = (VMStateSubsection[]) { + { + .vmsd = &vmstate_apic_common_sipi, + .needed = apic_common_sipi_needed, + }, + VMSTATE_END_OF_LIST() } }; From e6a33e45c270ea024929f4afb49283d610577af3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 12 Nov 2014 12:16:58 +0100 Subject: [PATCH 10/13] target-i386: fix Coverity complaints about overflows sipi_vector is an int; it is shifted by 12 and passed as a 64-bit value, which makes Coverity think that we wanted (uint64_t)sipi_vector << 12. But actually it must be between 0 and 255. Make this explicit. Signed-off-by: Paolo Bonzini --- target-i386/cpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 1b2c12ad94..015f5b5276 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -1104,7 +1104,7 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env, } static inline void cpu_x86_load_seg_cache_sipi(X86CPU *cpu, - int sipi_vector) + uint8_t sipi_vector) { CPUState *cs = CPU(cpu); CPUX86State *env = &cpu->env; From 1154d84dcc5f46e83db94281d071775819dd8884 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 3 Nov 2014 15:45:34 -0200 Subject: [PATCH 11/13] kvmclock: Add comment explaining why we need cpu_clean_all_dirty() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Try to explain why commit 317b0a6d8ba44e9bf8f9c3dbd776c4536843d82c needed a cpu_clean_all_dirty() call just after calling cpu_synchronize_all_states(). Signed-off-by: Eduardo Habkost Cc: Andrey Korolyov Cc: Marcin Gibuła Cc: Marcelo Tosatti Cc: Paolo Bonzini Signed-off-by: Paolo Bonzini --- hw/i386/kvm/clock.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c index 1ac60d6cdd..58be2bda27 100644 --- a/hw/i386/kvm/clock.c +++ b/hw/i386/kvm/clock.c @@ -127,7 +127,21 @@ static void kvmclock_vm_state_change(void *opaque, int running, } cpu_synchronize_all_states(); + /* In theory, the cpu_synchronize_all_states() call above wouldn't + * affect the rest of the code, as the VCPU state inside CPUState + * is supposed to always match the VCPU state on the kernel side. + * + * In practice, calling cpu_synchronize_state() too soon will load the + * kernel-side APIC state into X86CPU.apic_state too early, APIC state + * won't be reloaded later because CPUState.vcpu_dirty==true, and + * outdated APIC state may be migrated to another host. + * + * The real fix would be to make sure outdated APIC state is read + * from the kernel again when necessary. While this is not fixed, we + * need the cpu_clean_all_dirty() call below. + */ cpu_clean_all_dirty(); + ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); if (ret < 0) { fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); From f4ec5cd29d1f0d3a265039767399d2cf3e75950b Mon Sep 17 00:00:00 2001 From: SeokYeon Hwang Date: Wed, 5 Nov 2014 15:19:54 +0900 Subject: [PATCH 12/13] smbios: change 'ram_addr_t' variables to 'uint64_t' ram_addr_t should not be used except if referring to a RAMBlobk. Using 'uint64_t' avoids a -Wconstant-conversion warning, which clang >= 3.4 produces in "smbios_get_tables()". Signed-off-by: SeokYeon Hwang Signed-off-by: Paolo Bonzini --- hw/i386/smbios.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/i386/smbios.c b/hw/i386/smbios.c index 8a7ad48921..024e59445b 100644 --- a/hw/i386/smbios.c +++ b/hw/i386/smbios.c @@ -645,7 +645,7 @@ static void smbios_build_type_4_table(unsigned instance) static void smbios_build_type_16_table(unsigned dimm_cnt) { - ram_addr_t size_kb; + uint64_t size_kb; SMBIOS_BUILD_TABLE_PRE(16, 0x1000, true); /* required */ @@ -669,10 +669,10 @@ static void smbios_build_type_16_table(unsigned dimm_cnt) #define MAX_T17_STD_SZ 0x7FFF /* (32G - 1M), in Megabytes */ #define MAX_T17_EXT_SZ 0x80000000 /* 2P, in Megabytes */ -static void smbios_build_type_17_table(unsigned instance, ram_addr_t size) +static void smbios_build_type_17_table(unsigned instance, uint64_t size) { char loc_str[128]; - ram_addr_t size_mb; + uint64_t size_mb; SMBIOS_BUILD_TABLE_PRE(17, 0x1100 + instance, true); /* required */ @@ -711,9 +711,9 @@ static void smbios_build_type_17_table(unsigned instance, ram_addr_t size) } static void smbios_build_type_19_table(unsigned instance, - ram_addr_t start, ram_addr_t size) + uint64_t start, uint64_t size) { - ram_addr_t end, start_kb, end_kb; + uint64_t end, start_kb, end_kb; SMBIOS_BUILD_TABLE_PRE(19, 0x1300 + instance, true); /* required */ From 3ef0eab178e5120a0e1c079d163d5c71689d9b71 Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Fri, 7 Nov 2014 13:31:33 +0300 Subject: [PATCH 13/13] acpi: accurate overflow check Compare clock in ns, because acpi_pm_tmr_update uses rounded to ns value instead of ticks. Signed-off-by: Pavel Dovgalyuk [This lets Windows boot in icount mode. - Paolo] Signed-off-by: Paolo Bonzini --- hw/acpi/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/acpi/core.c b/hw/acpi/core.c index a7368fb242..51913d6932 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -376,8 +376,11 @@ static void acpi_notify_wakeup(Notifier *notifier, void *data) /* ACPI PM1a EVT */ uint16_t acpi_pm1_evt_get_sts(ACPIREGS *ar) { - int64_t d = acpi_pm_tmr_get_clock(); - if (d >= ar->tmr.overflow_time) { + /* Compare ns-clock, not PM timer ticks, because + acpi_pm_tmr_update function uses ns for setting the timer. */ + int64_t d = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + if (d >= muldiv64(ar->tmr.overflow_time, + get_ticks_per_sec(), PM_TIMER_FREQUENCY)) { ar->pm1.evt.sts |= ACPI_BITMASK_TIMER_STATUS; } return ar->pm1.evt.sts;