From 35fc1f71899fd42323bd8f33da18f0211e0d2727 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:50:26 +0300 Subject: [PATCH 01/36] vmstate: reduce code duplication move size offset and number of elements math out to functions, to reduce code duplication. Signed-off-by: Michael S. Tsirkin Cc: "Dr. David Alan Gilbert" Signed-off-by: Juan Quintela --- vmstate.c | 96 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 46 deletions(-) diff --git a/vmstate.c b/vmstate.c index b689f2f9b3..dd6f834331 100644 --- a/vmstate.c +++ b/vmstate.c @@ -10,6 +10,50 @@ static void vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd, static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd, void *opaque); +static int vmstate_n_elems(void *opaque, VMStateField *field) +{ + int n_elems = 1; + + if (field->flags & VMS_ARRAY) { + n_elems = field->num; + } else if (field->flags & VMS_VARRAY_INT32) { + n_elems = *(int32_t *)(opaque+field->num_offset); + } else if (field->flags & VMS_VARRAY_UINT32) { + n_elems = *(uint32_t *)(opaque+field->num_offset); + } else if (field->flags & VMS_VARRAY_UINT16) { + n_elems = *(uint16_t *)(opaque+field->num_offset); + } else if (field->flags & VMS_VARRAY_UINT8) { + n_elems = *(uint8_t *)(opaque+field->num_offset); + } + + return n_elems; +} + +static int vmstate_size(void *opaque, VMStateField *field) +{ + int size = field->size; + + if (field->flags & VMS_VBUFFER) { + size = *(int32_t *)(opaque+field->size_offset); + if (field->flags & VMS_MULTIPLY) { + size *= field->size; + } + } + + return size; +} + +static void *vmstate_base_addr(void *opaque, VMStateField *field) +{ + void *base_addr = opaque + field->offset; + + if (field->flags & VMS_POINTER) { + base_addr = *(void **)base_addr + field->start; + } + + return base_addr; +} + int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, void *opaque, int version_id) { @@ -36,30 +80,10 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, field->field_exists(opaque, version_id)) || (!field->field_exists && field->version_id <= version_id)) { - void *base_addr = opaque + field->offset; - int i, n_elems = 1; - int size = field->size; + void *base_addr = vmstate_base_addr(opaque, field); + int i, n_elems = vmstate_n_elems(opaque, field); + int size = vmstate_size(opaque, field); - if (field->flags & VMS_VBUFFER) { - size = *(int32_t *)(opaque+field->size_offset); - if (field->flags & VMS_MULTIPLY) { - size *= field->size; - } - } - if (field->flags & VMS_ARRAY) { - n_elems = field->num; - } else if (field->flags & VMS_VARRAY_INT32) { - n_elems = *(int32_t *)(opaque+field->num_offset); - } else if (field->flags & VMS_VARRAY_UINT32) { - n_elems = *(uint32_t *)(opaque+field->num_offset); - } else if (field->flags & VMS_VARRAY_UINT16) { - n_elems = *(uint16_t *)(opaque+field->num_offset); - } else if (field->flags & VMS_VARRAY_UINT8) { - n_elems = *(uint8_t *)(opaque+field->num_offset); - } - if (field->flags & VMS_POINTER) { - base_addr = *(void **)base_addr + field->start; - } for (i = 0; i < n_elems; i++) { void *addr = base_addr + size * i; @@ -102,30 +126,10 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, while (field->name) { if (!field->field_exists || field->field_exists(opaque, vmsd->version_id)) { - void *base_addr = opaque + field->offset; - int i, n_elems = 1; - int size = field->size; + void *base_addr = vmstate_base_addr(opaque, field); + int i, n_elems = vmstate_n_elems(opaque, field); + int size = vmstate_size(opaque, field); - if (field->flags & VMS_VBUFFER) { - size = *(int32_t *)(opaque+field->size_offset); - if (field->flags & VMS_MULTIPLY) { - size *= field->size; - } - } - if (field->flags & VMS_ARRAY) { - n_elems = field->num; - } else if (field->flags & VMS_VARRAY_INT32) { - n_elems = *(int32_t *)(opaque+field->num_offset); - } else if (field->flags & VMS_VARRAY_UINT32) { - n_elems = *(uint32_t *)(opaque+field->num_offset); - } else if (field->flags & VMS_VARRAY_UINT16) { - n_elems = *(uint16_t *)(opaque+field->num_offset); - } else if (field->flags & VMS_VARRAY_UINT8) { - n_elems = *(uint8_t *)(opaque+field->num_offset); - } - if (field->flags & VMS_POINTER) { - base_addr = *(void **)base_addr + field->start; - } for (i = 0; i < n_elems; i++) { void *addr = base_addr + size * i; From 5bf81c8d63db0216a4d29dc87f9ce530bb791dd1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:50:31 +0300 Subject: [PATCH 02/36] vmstate: add VMS_MUST_EXIST Can be used to verify a required field exists or validate state in some other way. Signed-off-by: Michael S. Tsirkin Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Juan Quintela --- include/migration/vmstate.h | 1 + vmstate.c | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index e7e170561d..de970abedd 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -100,6 +100,7 @@ enum VMStateFlags { VMS_MULTIPLY = 0x200, /* multiply "size" field by field_size */ VMS_VARRAY_UINT8 = 0x400, /* Array with size in uint8_t field*/ VMS_VARRAY_UINT32 = 0x800, /* Array with size in uint32_t field*/ + VMS_MUST_EXIST = 0x1000, /* Field must exist in input */ }; typedef struct { diff --git a/vmstate.c b/vmstate.c index dd6f834331..f019228188 100644 --- a/vmstate.c +++ b/vmstate.c @@ -102,6 +102,10 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, return ret; } } + } else if (field->flags & VMS_MUST_EXIST) { + fprintf(stderr, "Input validation failed: %s/%s\n", + vmsd->name, field->name); + return -1; } field++; } @@ -142,6 +146,12 @@ void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd, field->info->put(f, addr, size); } } + } else { + if (field->flags & VMS_MUST_EXIST) { + fprintf(stderr, "Output state validation failed: %s/%s\n", + vmsd->name, field->name); + assert(!(field->flags & VMS_MUST_EXIST)); + } } field++; } From 4082f0889ba04678fc14816c53e1b9251ea9207e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:50:35 +0300 Subject: [PATCH 03/36] vmstate: add VMSTATE_VALIDATE Validate state using VMS_ARRAY with num = 0 and VMS_MUST_EXIST Signed-off-by: Michael S. Tsirkin Signed-off-by: Juan Quintela --- include/migration/vmstate.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index de970abedd..5b7137058d 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -204,6 +204,14 @@ extern const VMStateInfo vmstate_info_bitmap; .offset = vmstate_offset_value(_state, _field, _type), \ } +/* Validate state using a boolean predicate. */ +#define VMSTATE_VALIDATE(_name, _test) { \ + .name = (_name), \ + .field_exists = (_test), \ + .flags = VMS_ARRAY | VMS_MUST_EXIST, \ + .num = 0, /* 0 elements: no data, only run _test */ \ +} + #define VMSTATE_POINTER(_field, _state, _version, _info, _type) { \ .name = (stringify(_field)), \ .version_id = (_version), \ From 71f7fe48e10a8437c9d42d859389f37157f59980 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:50:39 +0300 Subject: [PATCH 04/36] virtio-net: fix buffer overflow on invalid state load CVE-2013-4148 QEMU 1.0 integer conversion in virtio_net_load()@hw/net/virtio-net.c Deals with loading a corrupted savevm image. > n->mac_table.in_use = qemu_get_be32(f); in_use is int so it can get negative when assigned 32bit unsigned value. > /* MAC_TABLE_ENTRIES may be different from the saved image */ > if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) { passing this check ^^^ > qemu_get_buffer(f, n->mac_table.macs, > n->mac_table.in_use * ETH_ALEN); with good in_use value, "n->mac_table.in_use * ETH_ALEN" can get positive and bigger than mac_table.macs. For example 0x81000000 satisfies this condition when ETH_ALEN is 6. Fix it by making the value unsigned. For consistency, change first_multi as well. Note: all call sites were audited to confirm that making them unsigned didn't cause any issues: it turns out we actually never do math on them, so it's easy to validate because both values are always <= MAC_TABLE_ENTRIES. Reviewed-by: Michael Roth Signed-off-by: Michael S. Tsirkin Reviewed-by: Laszlo Ersek Signed-off-by: Juan Quintela --- include/hw/virtio/virtio-net.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index df60f16a3e..4b32440837 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -176,8 +176,8 @@ typedef struct VirtIONet { uint8_t nobcast; uint8_t vhost_started; struct { - int in_use; - int first_multi; + uint32_t in_use; + uint32_t first_multi; uint8_t multi_overflow; uint8_t uni_overflow; uint8_t *macs; From eea750a5623ddac7a61982eec8f1c93481857578 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:50:56 +0300 Subject: [PATCH 05/36] virtio-net: out-of-bounds buffer write on invalid state load CVE-2013-4150 QEMU 1.5.0 out-of-bounds buffer write in virtio_net_load()@hw/net/virtio-net.c This code is in hw/net/virtio-net.c: if (n->max_queues > 1) { if (n->max_queues != qemu_get_be16(f)) { error_report("virtio-net: different max_queues "); return -1; } n->curr_queues = qemu_get_be16(f); for (i = 1; i < n->curr_queues; i++) { n->vqs[i].tx_waiting = qemu_get_be32(f); } } Number of vqs is max_queues, so if we get invalid input here, for example if max_queues = 2, curr_queues = 3, we get write beyond end of the buffer, with data that comes from wire. This might be used to corrupt qemu memory in hard to predict ways. Since we have lots of function pointers around, RCE might be possible. Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Michael Roth Signed-off-by: Juan Quintela --- hw/net/virtio-net.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 33bd233a2d..0a8cb40b92 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1407,6 +1407,11 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) } n->curr_queues = qemu_get_be16(f); + if (n->curr_queues > n->max_queues) { + error_report("virtio-net: curr_queues %x > max_queues %x", + n->curr_queues, n->max_queues); + return -1; + } for (i = 1; i < n->curr_queues; i++) { n->vqs[i].tx_waiting = qemu_get_be32(f); } From cc45995294b92d95319b4782750a3580cabdbc0c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:51:14 +0300 Subject: [PATCH 06/36] virtio: out-of-bounds buffer write on invalid state load CVE-2013-4151 QEMU 1.0 out-of-bounds buffer write in virtio_load@hw/virtio/virtio.c So we have this code since way back when: num = qemu_get_be32(f); for (i = 0; i < num; i++) { vdev->vq[i].vring.num = qemu_get_be32(f); array of vqs has size VIRTIO_PCI_QUEUE_MAX, so on invalid input this will write beyond end of buffer. Signed-off-by: Michael S. Tsirkin Reviewed-by: Michael Roth Signed-off-by: Juan Quintela --- hw/virtio/virtio.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index aeabf3a459..05f05e7c10 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -891,7 +891,8 @@ int virtio_set_features(VirtIODevice *vdev, uint32_t val) int virtio_load(VirtIODevice *vdev, QEMUFile *f) { - int num, i, ret; + int i, ret; + uint32_t num; uint32_t features; uint32_t supported_features; BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); @@ -919,6 +920,11 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) num = qemu_get_be32(f); + if (num > VIRTIO_PCI_QUEUE_MAX) { + error_report("Invalid number of PCI queues: 0x%x", num); + return -1; + } + for (i = 0; i < num; i++) { vdev->vq[i].vring.num = qemu_get_be32(f); if (k->has_variable_vring_alignment) { From ae2158ad6ce0845b2fae2a22aa7f19c0d7a71ce5 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:51:18 +0300 Subject: [PATCH 07/36] ahci: fix buffer overrun on invalid state load CVE-2013-4526 Within hw/ide/ahci.c, VARRAY refers to ports which is also loaded. So we use the old version of ports to read the array but then allow any value for ports. This can cause the code to overflow. There's no reason to migrate ports - it never changes. So just make sure it matches. Reported-by: Anthony Liguori Signed-off-by: Michael S. Tsirkin Reviewed-by: Peter Maydell Signed-off-by: Juan Quintela --- hw/ide/ahci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 50327ffdf1..e57c5837d2 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1293,7 +1293,7 @@ const VMStateDescription vmstate_ahci = { VMSTATE_UINT32(control_regs.impl, AHCIState), VMSTATE_UINT32(control_regs.version, AHCIState), VMSTATE_UINT32(idp_index, AHCIState), - VMSTATE_INT32(ports, AHCIState), + VMSTATE_INT32_EQUAL(ports, AHCIState), VMSTATE_END_OF_LIST() }, }; From 3f1c49e2136fa08ab1ef3183fd55def308829584 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:51:23 +0300 Subject: [PATCH 08/36] hpet: fix buffer overrun on invalid state load CVE-2013-4527 hw/timer/hpet.c buffer overrun hpet is a VARRAY with a uint8 size but static array of 32 To fix, make sure num_timers is valid using VMSTATE_VALID hook. Reported-by: Anthony Liguori Signed-off-by: Michael S. Tsirkin Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Juan Quintela --- hw/timer/hpet.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c index e15d6bcac7..2792f89c66 100644 --- a/hw/timer/hpet.c +++ b/hw/timer/hpet.c @@ -239,6 +239,18 @@ static int hpet_pre_load(void *opaque) return 0; } +static bool hpet_validate_num_timers(void *opaque, int version_id) +{ + HPETState *s = opaque; + + if (s->num_timers < HPET_MIN_TIMERS) { + return false; + } else if (s->num_timers > HPET_MAX_TIMERS) { + return false; + } + return true; +} + static int hpet_post_load(void *opaque, int version_id) { HPETState *s = opaque; @@ -307,6 +319,7 @@ static const VMStateDescription vmstate_hpet = { VMSTATE_UINT64(isr, HPETState), VMSTATE_UINT64(hpet_counter, HPETState), VMSTATE_UINT8_V(num_timers, HPETState, 2), + VMSTATE_VALIDATE("num_timers in range", hpet_validate_num_timers), VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers, 0, vmstate_hpet_timer, HPETTimer), VMSTATE_END_OF_LIST() From 5f691ff91d323b6f97c6600405a7f9dc115a0ad1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:51:31 +0300 Subject: [PATCH 09/36] hw/pci/pcie_aer.c: fix buffer overruns on invalid state load 4) CVE-2013-4529 hw/pci/pcie_aer.c pcie aer log can overrun the buffer if log_num is too large There are two issues in this file: 1. log_max from remote can be larger than on local then buffer will overrun with data coming from state file. 2. log_num can be larger then we get data corruption again with an overflow but not adversary controlled. Fix both issues. Reported-by: Anthony Liguori Reported-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Juan Quintela --- hw/pci/pcie_aer.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c index 991502e517..535be2c08a 100644 --- a/hw/pci/pcie_aer.c +++ b/hw/pci/pcie_aer.c @@ -795,6 +795,13 @@ static const VMStateDescription vmstate_pcie_aer_err = { } }; +static bool pcie_aer_state_log_num_valid(void *opaque, int version_id) +{ + PCIEAERLog *s = opaque; + + return s->log_num <= s->log_max; +} + const VMStateDescription vmstate_pcie_aer_log = { .name = "PCIE_AER_ERROR_LOG", .version_id = 1, @@ -802,7 +809,8 @@ const VMStateDescription vmstate_pcie_aer_log = { .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_UINT16(log_num, PCIEAERLog), - VMSTATE_UINT16(log_max, PCIEAERLog), + VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog), + VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid), VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num, vmstate_pcie_aer_err, PCIEAERErr), VMSTATE_END_OF_LIST() From d8d0a0bc7e194300e53a346d25fe5724fd588387 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:51:35 +0300 Subject: [PATCH 10/36] pl022: fix buffer overun on invalid state load CVE-2013-4530 pl022.c did not bounds check tx_fifo_head and rx_fifo_head after loading them from file and before they are used to dereference array. Reported-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: Juan Quintela --- hw/ssi/pl022.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/hw/ssi/pl022.c b/hw/ssi/pl022.c index fd479effb9..b19bc7174a 100644 --- a/hw/ssi/pl022.c +++ b/hw/ssi/pl022.c @@ -240,11 +240,25 @@ static const MemoryRegionOps pl022_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; +static int pl022_post_load(void *opaque, int version_id) +{ + PL022State *s = opaque; + + if (s->tx_fifo_head < 0 || + s->tx_fifo_head >= ARRAY_SIZE(s->tx_fifo) || + s->rx_fifo_head < 0 || + s->rx_fifo_head >= ARRAY_SIZE(s->rx_fifo)) { + return -1; + } + return 0; +} + static const VMStateDescription vmstate_pl022 = { .name = "pl022_ssp", .version_id = 1, .minimum_version_id = 1, .minimum_version_id_old = 1, + .post_load = pl022_post_load, .fields = (VMStateField[]) { VMSTATE_UINT32(cr0, PL022State), VMSTATE_UINT32(cr1, PL022State), From d2ef4b61fe6d33d2a5dcf100a9b9440de341ad62 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:51:42 +0300 Subject: [PATCH 11/36] vmstate: fix buffer overflow in target-arm/machine.c CVE-2013-4531 cpreg_vmstate_indexes is a VARRAY_INT32. A negative value for cpreg_vmstate_array_len will cause a buffer overflow. VMSTATE_INT32_LE was supposed to protect against this but doesn't because it doesn't validate that input is non-negative. Fix this macro to valide the value appropriately. The only other user of VMSTATE_INT32_LE doesn't ever use negative numbers so it doesn't care. Reported-by: Anthony Liguori Signed-off-by: Michael S. Tsirkin Signed-off-by: Juan Quintela --- vmstate.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vmstate.c b/vmstate.c index f019228188..dbb76665d9 100644 --- a/vmstate.c +++ b/vmstate.c @@ -337,8 +337,9 @@ const VMStateInfo vmstate_info_int32_equal = { .put = put_int32, }; -/* 32 bit int. Check that the received value is less than or equal to - the one in the field */ +/* 32 bit int. Check that the received value is non-negative + * and less than or equal to the one in the field. + */ static int get_int32_le(QEMUFile *f, void *pv, size_t size) { @@ -346,7 +347,7 @@ static int get_int32_le(QEMUFile *f, void *pv, size_t size) int32_t loaded; qemu_get_sbe32s(f, &loaded); - if (loaded <= *cur) { + if (loaded >= 0 && loaded <= *cur) { *cur = loaded; return 0; } From 4b53c2c72cb5541cf394033b528a6fe2a86c0ac1 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 3 Apr 2014 19:51:46 +0300 Subject: [PATCH 12/36] virtio: avoid buffer overrun on incoming migration CVE-2013-6399 vdev->queue_sel is read from the wire, and later used in the emulation code as an index into vdev->vq[]. If the value of vdev->queue_sel exceeds the length of vdev->vq[], currently allocated to be VIRTIO_PCI_QUEUE_MAX elements, subsequent PIO operations such as VIRTIO_PCI_QUEUE_PFN can be used to overrun the buffer with arbitrary data originating from the source. Fix this by failing migration if the value from the wire exceeds VIRTIO_PCI_QUEUE_MAX. Signed-off-by: Michael Roth Signed-off-by: Michael S. Tsirkin Reviewed-by: Peter Maydell Signed-off-by: Juan Quintela --- hw/virtio/virtio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 05f05e7c10..007254257f 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -907,6 +907,9 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) qemu_get_8s(f, &vdev->status); qemu_get_8s(f, &vdev->isr); qemu_get_be16s(f, &vdev->queue_sel); + if (vdev->queue_sel >= VIRTIO_PCI_QUEUE_MAX) { + return -1; + } qemu_get_be32s(f, &features); if (virtio_set_features(vdev, features) < 0) { From 36cf2a37132c7f01fa9adb5f95f5312b27742fd4 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:51:53 +0300 Subject: [PATCH 13/36] virtio: validate num_sg when mapping CVE-2013-4535 CVE-2013-4536 Both virtio-block and virtio-serial read, VirtQueueElements are read in as buffers, and passed to virtqueue_map_sg(), where num_sg is taken from the wire and can force writes to indicies beyond VIRTQUEUE_MAX_SIZE. To fix, validate num_sg. Reported-by: Michael Roth Signed-off-by: Michael S. Tsirkin Cc: Amit Shah Signed-off-by: Juan Quintela --- hw/virtio/virtio.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 007254257f..a70169af07 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -430,6 +430,12 @@ void virtqueue_map_sg(struct iovec *sg, hwaddr *addr, unsigned int i; hwaddr len; + if (num_sg >= VIRTQUEUE_MAX_SIZE) { + error_report("virtio: map attempt out of bounds: %zd > %d", + num_sg, VIRTQUEUE_MAX_SIZE); + exit(1); + } + for (i = 0; i < num_sg; i++) { len = sg[i].iov_len; sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write); From caa881abe0e01f9931125a0977ec33c5343e4aa7 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:51:57 +0300 Subject: [PATCH 14/36] pxa2xx: avoid buffer overrun on incoming migration CVE-2013-4533 s->rx_level is read from the wire and used to determine how many bytes to subsequently read into s->rx_fifo[]. If s->rx_level exceeds the length of s->rx_fifo[] the buffer can be overrun with arbitrary data from the wire. Fix this by validating rx_level against the size of s->rx_fifo. Cc: Don Koch Reported-by: Michael Roth Signed-off-by: Michael S. Tsirkin Reviewed-by: Peter Maydell Reviewed-by: Don Koch Signed-off-by: Juan Quintela --- hw/arm/pxa2xx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c index 04291488e4..e0cd847b95 100644 --- a/hw/arm/pxa2xx.c +++ b/hw/arm/pxa2xx.c @@ -732,7 +732,7 @@ static void pxa2xx_ssp_save(QEMUFile *f, void *opaque) static int pxa2xx_ssp_load(QEMUFile *f, void *opaque, int version_id) { PXA2xxSSPState *s = (PXA2xxSSPState *) opaque; - int i; + int i, v; s->enable = qemu_get_be32(f); @@ -746,7 +746,11 @@ static int pxa2xx_ssp_load(QEMUFile *f, void *opaque, int version_id) qemu_get_8s(f, &s->ssrsa); qemu_get_8s(f, &s->ssacd); - s->rx_level = qemu_get_byte(f); + v = qemu_get_byte(f); + if (v < 0 || v > ARRAY_SIZE(s->rx_fifo)) { + return -EINVAL; + } + s->rx_level = v; s->rx_start = 0; for (i = 0; i < s->rx_level; i ++) s->rx_fifo[i] = qemu_get_byte(f); From ead7a57df37d2187813a121308213f41591bd811 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:52:05 +0300 Subject: [PATCH 15/36] ssd0323: fix buffer overun on invalid state load CVE-2013-4538 s->cmd_len used as index in ssd0323_transfer() to store 32-bit field. Possible this field might then be supplied by guest to overwrite a return addr somewhere. Same for row/col fields, which are indicies into framebuffer array. To fix validate after load. Additionally, validate that the row/col_start/end are within bounds; otherwise the guest can provoke an overrun by either setting the _end field so large that the row++ increments just walk off the end of the array, or by setting the _start value to something bogus and then letting the "we hit end of row" logic reset row to row_start. For completeness, validate mode as well. Signed-off-by: Michael S. Tsirkin Reviewed-by: Peter Maydell Signed-off-by: Juan Quintela --- hw/display/ssd0323.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/hw/display/ssd0323.c b/hw/display/ssd0323.c index 971152edbd..97270077e2 100644 --- a/hw/display/ssd0323.c +++ b/hw/display/ssd0323.c @@ -312,18 +312,42 @@ static int ssd0323_load(QEMUFile *f, void *opaque, int version_id) return -EINVAL; s->cmd_len = qemu_get_be32(f); + if (s->cmd_len < 0 || s->cmd_len > ARRAY_SIZE(s->cmd_data)) { + return -EINVAL; + } s->cmd = qemu_get_be32(f); for (i = 0; i < 8; i++) s->cmd_data[i] = qemu_get_be32(f); s->row = qemu_get_be32(f); + if (s->row < 0 || s->row >= 80) { + return -EINVAL; + } s->row_start = qemu_get_be32(f); + if (s->row_start < 0 || s->row_start >= 80) { + return -EINVAL; + } s->row_end = qemu_get_be32(f); + if (s->row_end < 0 || s->row_end >= 80) { + return -EINVAL; + } s->col = qemu_get_be32(f); + if (s->col < 0 || s->col >= 64) { + return -EINVAL; + } s->col_start = qemu_get_be32(f); + if (s->col_start < 0 || s->col_start >= 64) { + return -EINVAL; + } s->col_end = qemu_get_be32(f); + if (s->col_end < 0 || s->col_end >= 64) { + return -EINVAL; + } s->redraw = qemu_get_be32(f); s->remap = qemu_get_be32(f); s->mode = qemu_get_be32(f); + if (s->mode != SSD0323_CMD && s->mode != SSD0323_DATA) { + return -EINVAL; + } qemu_get_buffer(f, s->framebuffer, sizeof(s->framebuffer)); ss->cs = qemu_get_be32(f); From 5193be3be35f29a35bc465036cd64ad60d43385f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:52:09 +0300 Subject: [PATCH 16/36] tsc210x: fix buffer overrun on invalid state load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CVE-2013-4539 s->precision, nextprecision, function and nextfunction come from wire and are used as idx into resolution[] in TSC_CUT_RESOLUTION. Validate after load to avoid buffer overrun. Cc: Andreas Färber Signed-off-by: Michael S. Tsirkin Signed-off-by: Juan Quintela --- hw/input/tsc210x.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hw/input/tsc210x.c b/hw/input/tsc210x.c index 485c9e5753..aa5b6886ea 100644 --- a/hw/input/tsc210x.c +++ b/hw/input/tsc210x.c @@ -1070,9 +1070,21 @@ static int tsc210x_load(QEMUFile *f, void *opaque, int version_id) s->enabled = qemu_get_byte(f); s->host_mode = qemu_get_byte(f); s->function = qemu_get_byte(f); + if (s->function < 0 || s->function >= ARRAY_SIZE(mode_regs)) { + return -EINVAL; + } s->nextfunction = qemu_get_byte(f); + if (s->nextfunction < 0 || s->nextfunction >= ARRAY_SIZE(mode_regs)) { + return -EINVAL; + } s->precision = qemu_get_byte(f); + if (s->precision < 0 || s->precision >= ARRAY_SIZE(resolution)) { + return -EINVAL; + } s->nextprecision = qemu_get_byte(f); + if (s->nextprecision < 0 || s->nextprecision >= ARRAY_SIZE(resolution)) { + return -EINVAL; + } s->filter = qemu_get_byte(f); s->pin_func = qemu_get_byte(f); s->ref = qemu_get_byte(f); From 52f91c3723932f8340fe36c8ec8b18a757c37b2b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:52:13 +0300 Subject: [PATCH 17/36] zaurus: fix buffer overrun on invalid state load CVE-2013-4540 Within scoop_gpio_handler_update, if prev_level has a high bit set, then we get bit > 16 and that causes a buffer overrun. Since prev_level comes from wire indirectly, this can happen on invalid state load. Similarly for gpio_level and gpio_dir. To fix, limit to 16 bit. Reported-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Juan Quintela --- hw/gpio/zaurus.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hw/gpio/zaurus.c b/hw/gpio/zaurus.c index dc79a8baa6..8e2ce049de 100644 --- a/hw/gpio/zaurus.c +++ b/hw/gpio/zaurus.c @@ -203,6 +203,15 @@ static bool is_version_0 (void *opaque, int version_id) return version_id == 0; } +static bool vmstate_scoop_validate(void *opaque, int version_id) +{ + ScoopInfo *s = opaque; + + return !(s->prev_level & 0xffff0000) && + !(s->gpio_level & 0xffff0000) && + !(s->gpio_dir & 0xffff0000); +} + static const VMStateDescription vmstate_scoop_regs = { .name = "scoop", .version_id = 1, @@ -215,6 +224,7 @@ static const VMStateDescription vmstate_scoop_regs = { VMSTATE_UINT32(gpio_level, ScoopInfo), VMSTATE_UINT32(gpio_dir, ScoopInfo), VMSTATE_UINT32(prev_level, ScoopInfo), + VMSTATE_VALIDATE("irq levels are 16 bit", vmstate_scoop_validate), VMSTATE_UINT16(mcr, ScoopInfo), VMSTATE_UINT16(cdr, ScoopInfo), VMSTATE_UINT16(ccr, ScoopInfo), From 3c3ce981423e0d6c18af82ee62f1850c2cda5976 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:52:17 +0300 Subject: [PATCH 18/36] virtio-scsi: fix buffer overrun on invalid state load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CVE-2013-4542 hw/scsi/scsi-bus.c invokes load_request. virtio_scsi_load_request does: qemu_get_buffer(f, (unsigned char *)&req->elem, sizeof(req->elem)); this probably can make elem invalid, for example, make in_num or out_num huge, then: virtio_scsi_parse_req(s, vs->cmd_vqs[n], req); will do: if (req->elem.out_num > 1) { qemu_sgl_init_external(req, &req->elem.out_sg[1], &req->elem.out_addr[1], req->elem.out_num - 1); } else { qemu_sgl_init_external(req, &req->elem.in_sg[1], &req->elem.in_addr[1], req->elem.in_num - 1); } and this will access out of array bounds. Note: this adds security checks within assert calls since SCSIBusInfo's load_request cannot fail. For now simply disable builds with NDEBUG - there seems to be little value in supporting these. Cc: Andreas Färber Signed-off-by: Michael S. Tsirkin Signed-off-by: Juan Quintela --- hw/scsi/virtio-scsi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index b0d7517cb1..175219376c 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -147,6 +147,15 @@ static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq) qemu_get_be32s(f, &n); assert(n < vs->conf.num_queues); qemu_get_buffer(f, (unsigned char *)&req->elem, sizeof(req->elem)); + /* TODO: add a way for SCSIBusInfo's load_request to fail, + * and fail migration instead of asserting here. + * When we do, we might be able to re-enable NDEBUG below. + */ +#ifdef NDEBUG +#error building with NDEBUG is not supported +#endif + assert(req->elem.in_num <= ARRAY_SIZE(req->elem.in_sg)); + assert(req->elem.out_num <= ARRAY_SIZE(req->elem.out_sg)); virtio_scsi_parse_req(s, vs->cmd_vqs[n], req); scsi_req_ref(sreq); From 3476436a44c29725efef0cabf5b3ea4e70054d57 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:52:21 +0300 Subject: [PATCH 19/36] vmstate: s/VMSTATE_INT32_LE/VMSTATE_INT32_POSITIVE_LE/ As the macro verifies the value is positive, rename it to make the function clearer. Signed-off-by: Michael S. Tsirkin Signed-off-by: Juan Quintela --- hw/pci/pci.c | 4 ++-- include/migration/vmstate.h | 2 +- target-arm/machine.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 2a9f08eb0a..517ff2a21b 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -475,7 +475,7 @@ const VMStateDescription vmstate_pci_device = { .minimum_version_id = 1, .minimum_version_id_old = 1, .fields = (VMStateField []) { - VMSTATE_INT32_LE(version_id, PCIDevice), + VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice), VMSTATE_BUFFER_UNSAFE_INFO(config, PCIDevice, 0, vmstate_info_pci_config, PCI_CONFIG_SPACE_SIZE), @@ -492,7 +492,7 @@ const VMStateDescription vmstate_pcie_device = { .minimum_version_id = 1, .minimum_version_id_old = 1, .fields = (VMStateField []) { - VMSTATE_INT32_LE(version_id, PCIDevice), + VMSTATE_INT32_POSITIVE_LE(version_id, PCIDevice), VMSTATE_BUFFER_UNSAFE_INFO(config, PCIDevice, 0, vmstate_info_pci_config, PCIE_CONFIG_SPACE_SIZE), diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 5b7137058d..7e45048355 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -601,7 +601,7 @@ extern const VMStateInfo vmstate_info_bitmap; #define VMSTATE_UINT64_EQUAL(_f, _s) \ VMSTATE_UINT64_EQUAL_V(_f, _s, 0) -#define VMSTATE_INT32_LE(_f, _s) \ +#define VMSTATE_INT32_POSITIVE_LE(_f, _s) \ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_le, int32_t) #define VMSTATE_UINT8_TEST(_f, _s, _t) \ diff --git a/target-arm/machine.c b/target-arm/machine.c index b967223fc0..810ba27f40 100644 --- a/target-arm/machine.c +++ b/target-arm/machine.c @@ -248,7 +248,7 @@ const VMStateDescription vmstate_arm_cpu = { /* The length-check must come before the arrays to avoid * incoming data possibly overflowing the array. */ - VMSTATE_INT32_LE(cpreg_vmstate_array_len, ARMCPU), + VMSTATE_INT32_POSITIVE_LE(cpreg_vmstate_array_len, ARMCPU), VMSTATE_VARRAY_INT32(cpreg_vmstate_indexes, ARMCPU, cpreg_vmstate_array_len, 0, vmstate_info_uint64, uint64_t), From 9f8e9895c504149d7048e9fc5eb5cbb34b16e49a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 3 Apr 2014 19:52:25 +0300 Subject: [PATCH 20/36] usb: sanity check setup_index+setup_len in post_load CVE-2013-4541 s->setup_len and s->setup_index are fed into usb_packet_copy as size/offset into s->data_buf, it's possible for invalid state to exploit this to load arbitrary data. setup_len and setup_index should be checked to make sure they are not negative. Cc: Gerd Hoffmann Signed-off-by: Michael S. Tsirkin Reviewed-by: Gerd Hoffmann Signed-off-by: Juan Quintela --- hw/usb/bus.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/usb/bus.c b/hw/usb/bus.c index fe70429304..e48b19fc29 100644 --- a/hw/usb/bus.c +++ b/hw/usb/bus.c @@ -49,7 +49,9 @@ static int usb_device_post_load(void *opaque, int version_id) } else { dev->attached = 1; } - if (dev->setup_index >= sizeof(dev->data_buf) || + if (dev->setup_index < 0 || + dev->setup_len < 0 || + dev->setup_index >= sizeof(dev->data_buf) || dev->setup_len >= sizeof(dev->data_buf)) { return -EINVAL; } From 767adce2d9cd397de3418caa16be35ea18d56f22 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 3 Apr 2014 19:52:28 +0300 Subject: [PATCH 21/36] savevm: Ignore minimum_version_id_old if there is no load_state_old At the moment we require vmstate definitions to set minimum_version_id_old to the same value as minimum_version_id if they do not provide a load_state_old handler. Since the load_state_old functionality is required only for a handful of devices that need to retain migration compatibility with a pre-vmstate implementation, this means the bulk of devices have pointless boilerplate. Relax the definition so that minimum_version_id_old is ignored if there is no load_state_old handler. Note that under the old scheme we would segfault if the vmstate specified a minimum_version_id_old that was less than minimum_version_id but did not provide a load_state_old function, and the incoming state specified a version number between minimum_version_id_old and minimum_version_id. Under the new scheme this will just result in our failing the migration. Signed-off-by: Peter Maydell Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: Juan Quintela --- docs/migration.txt | 12 +++++------- vmstate.c | 9 +++++---- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/docs/migration.txt b/docs/migration.txt index 0e0a1d44da..fe1f2bb738 100644 --- a/docs/migration.txt +++ b/docs/migration.txt @@ -139,7 +139,6 @@ static const VMStateDescription vmstate_kbd = { .name = "pckbd", .version_id = 3, .minimum_version_id = 3, - .minimum_version_id_old = 3, .fields = (VMStateField []) { VMSTATE_UINT8(write_cmd, KBDState), VMSTATE_UINT8(status, KBDState), @@ -168,12 +167,13 @@ You can see that there are several version fields: - minimum_version_id: the minimum version_id that VMState is able to understand for that device. - minimum_version_id_old: For devices that were not able to port to vmstate, we can - assign a function that knows how to read this old state. + assign a function that knows how to read this old state. This field is + ignored if there is no load_state_old handler. So, VMState is able to read versions from minimum_version_id to -version_id. And the function load_state_old() is able to load state -from minimum_version_id_old to minimum_version_id. This function is -deprecated and will be removed when no more users are left. +version_id. And the function load_state_old() (if present) is able to +load state from minimum_version_id_old to minimum_version_id. This +function is deprecated and will be removed when no more users are left. === Massaging functions === @@ -255,7 +255,6 @@ const VMStateDescription vmstate_ide_drive_pio_state = { .name = "ide_drive/pio_state", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .pre_save = ide_drive_pio_pre_save, .post_load = ide_drive_pio_post_load, .fields = (VMStateField []) { @@ -275,7 +274,6 @@ const VMStateDescription vmstate_ide_drive = { .name = "ide_drive", .version_id = 3, .minimum_version_id = 0, - .minimum_version_id_old = 0, .post_load = ide_drive_post_load, .fields = (VMStateField []) { .... several fields .... diff --git a/vmstate.c b/vmstate.c index dbb76665d9..b5882fae67 100644 --- a/vmstate.c +++ b/vmstate.c @@ -63,11 +63,12 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, if (version_id > vmsd->version_id) { return -EINVAL; } - if (version_id < vmsd->minimum_version_id_old) { - return -EINVAL; - } if (version_id < vmsd->minimum_version_id) { - return vmsd->load_state_old(f, opaque, version_id); + if (vmsd->load_state_old && + version_id >= vmsd->minimum_version_id_old) { + return vmsd->load_state_old(f, opaque, version_id); + } + return -EINVAL; } if (vmsd->pre_load) { int ret = vmsd->pre_load(opaque); From a9c380db3b8c6af19546a68145c8d1438a09c92b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 28 Apr 2014 16:08:14 +0300 Subject: [PATCH 22/36] ssi-sd: fix buffer overrun on invalid state load CVE-2013-4537 s->arglen is taken from wire and used as idx in ssi_sd_transfer(). Validate it before access. Signed-off-by: Michael S. Tsirkin Signed-off-by: Juan Quintela --- hw/sd/ssi-sd.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hw/sd/ssi-sd.c b/hw/sd/ssi-sd.c index 3273c8a31f..b012e57f64 100644 --- a/hw/sd/ssi-sd.c +++ b/hw/sd/ssi-sd.c @@ -230,8 +230,17 @@ static int ssi_sd_load(QEMUFile *f, void *opaque, int version_id) for (i = 0; i < 5; i++) s->response[i] = qemu_get_be32(f); s->arglen = qemu_get_be32(f); + if (s->mode == SSI_SD_CMDARG && + (s->arglen < 0 || s->arglen >= ARRAY_SIZE(s->cmdarg))) { + return -EINVAL; + } s->response_pos = qemu_get_be32(f); s->stopping = qemu_get_be32(f); + if (s->mode == SSI_SD_RESPONSE && + (s->response_pos < 0 || s->response_pos >= ARRAY_SIZE(s->response) || + (!s->stopping && s->arglen > ARRAY_SIZE(s->response)))) { + return -EINVAL; + } ss->cs = qemu_get_be32(f); From 73d963c0a75cb99c6aaa3f6f25e427aa0b35a02e Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Mon, 28 Apr 2014 16:08:17 +0300 Subject: [PATCH 23/36] openpic: avoid buffer overrun on incoming migration CVE-2013-4534 opp->nb_cpus is read from the wire and used to determine how many IRQDest elements to read into opp->dst[]. If the value exceeds the length of opp->dst[], MAX_CPU, opp->dst[] can be overrun with arbitrary data from the wire. Fix this by failing migration if the value read from the wire exceeds MAX_CPU. Signed-off-by: Michael Roth Reviewed-by: Alexander Graf Signed-off-by: Michael S. Tsirkin Signed-off-by: Juan Quintela --- hw/intc/openpic.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c index be76fbd78f..17136c9333 100644 --- a/hw/intc/openpic.c +++ b/hw/intc/openpic.c @@ -41,6 +41,7 @@ #include "hw/sysbus.h" #include "hw/pci/msi.h" #include "qemu/bitops.h" +#include "qapi/qmp/qerror.h" //#define DEBUG_OPENPIC @@ -1416,7 +1417,7 @@ static void openpic_load_IRQ_queue(QEMUFile* f, IRQQueue *q) static int openpic_load(QEMUFile* f, void *opaque, int version_id) { OpenPICState *opp = (OpenPICState *)opaque; - unsigned int i; + unsigned int i, nb_cpus; if (version_id != 1) { return -EINVAL; @@ -1428,7 +1429,11 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id) qemu_get_be32s(f, &opp->spve); qemu_get_be32s(f, &opp->tfrr); - qemu_get_be32s(f, &opp->nb_cpus); + qemu_get_be32s(f, &nb_cpus); + if (opp->nb_cpus != nb_cpus) { + return -EINVAL; + } + assert(nb_cpus > 0 && nb_cpus <= MAX_CPU); for (i = 0; i < opp->nb_cpus; i++) { qemu_get_sbe32s(f, &opp->dst[i].ctpr); @@ -1567,6 +1572,13 @@ static void openpic_realize(DeviceState *dev, Error **errp) {NULL} }; + if (opp->nb_cpus > MAX_CPU) { + error_set(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE, + TYPE_OPENPIC, "nb_cpus", (uint64_t)opp->nb_cpus, + (uint64_t)0, (uint64_t)MAX_CPU); + return; + } + switch (opp->model) { case OPENPIC_MODEL_FSL_MPIC_20: default: From 98f93ddd84800f207889491e0b5d851386b459cf Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 28 Apr 2014 16:08:21 +0300 Subject: [PATCH 24/36] virtio-net: out-of-bounds buffer write on load CVE-2013-4149 QEMU 1.3.0 out-of-bounds buffer write in virtio_net_load()@hw/net/virtio-net.c > } else if (n->mac_table.in_use) { > uint8_t *buf = g_malloc0(n->mac_table.in_use); We are allocating buffer of size n->mac_table.in_use > qemu_get_buffer(f, buf, n->mac_table.in_use * ETH_ALEN); and read to the n->mac_table.in_use size buffer n->mac_table.in_use * ETH_ALEN bytes, corrupting memory. If adversary controls state then memory written there is controlled by adversary. Reviewed-by: Michael Roth Signed-off-by: Michael S. Tsirkin Signed-off-by: Juan Quintela --- hw/net/virtio-net.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 0a8cb40b92..940a7cfe54 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1362,10 +1362,17 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) { qemu_get_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN); - } else if (n->mac_table.in_use) { - uint8_t *buf = g_malloc0(n->mac_table.in_use); - qemu_get_buffer(f, buf, n->mac_table.in_use * ETH_ALEN); - g_free(buf); + } else { + int64_t i; + + /* Overflow detected - can happen if source has a larger MAC table. + * We simply set overflow flag so there's no need to maintain the + * table of addresses, discard them all. + * Note: 64 bit math to avoid integer overflow. + */ + for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) { + qemu_get_byte(f); + } n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1; n->mac_table.in_use = 0; } From a890a2f9137ac3cf5b607649e66a6f3a5512d8dc Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 28 Apr 2014 16:08:23 +0300 Subject: [PATCH 25/36] virtio: validate config_len on load Malformed input can have config_len in migration stream exceed the array size allocated on destination, the result will be heap overflow. To fix, that config_len matches on both sides. CVE-2014-0182 Reported-by: "Dr. David Alan Gilbert" Signed-off-by: Michael S. Tsirkin Signed-off-by: Juan Quintela -- v2: use %ix and %zx to print config_len values Signed-off-by: Juan Quintela --- hw/virtio/virtio.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index a70169af07..7f4e7eca0e 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -898,6 +898,7 @@ int virtio_set_features(VirtIODevice *vdev, uint32_t val) int virtio_load(VirtIODevice *vdev, QEMUFile *f) { int i, ret; + int32_t config_len; uint32_t num; uint32_t features; uint32_t supported_features; @@ -924,7 +925,12 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f) features, supported_features); return -1; } - vdev->config_len = qemu_get_be32(f); + config_len = qemu_get_be32(f); + if (config_len != vdev->config_len) { + error_report("Unexpected config length 0x%x. Expected 0x%zx", + config_len, vdev->config_len); + return -1; + } qemu_get_buffer(f, vdev->config, vdev->config_len); num = qemu_get_be32(f); From ca99993adc9205c905dba5dc1bb819959ada7200 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 14 Apr 2014 17:03:59 +0100 Subject: [PATCH 26/36] Disallow outward migration while awaiting incoming migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QEMU will assert if you attempt to start an outgoing migration on a QEMU that's sitting waiting for an incoming migration (started with -incoming), so disallow it with a proper error. (This is a fix for https://bugzilla.redhat.com/show_bug.cgi?id=1086987 ) Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Andreas Färber Reviewed-by: Eric Blake Signed-off-by: Juan Quintela --- migration.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/migration.c b/migration.c index bd1fb912ae..ac232758b9 100644 --- a/migration.c +++ b/migration.c @@ -419,6 +419,11 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, return; } + if (runstate_check(RUN_STATE_INMIGRATE)) { + error_setg(errp, "Guest is waiting for an incoming migration"); + return; + } + if (qemu_savevm_state_blocked(errp)) { return; } From 548f52ea06951c20f0b91cae6cde0512ec073c83 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 8 Apr 2014 15:29:37 +0100 Subject: [PATCH 27/36] Make qemu_peek_buffer loop until it gets it's data Make qemu_peek_buffer repeatedly call fill_buffer until it gets all the data it requires, or until there is an error. At the moment, qemu_peek_buffer will try one qemu_fill_buffer if there isn't enough data waiting, however the kernel is entitled to return just a few bytes, and still leave qemu_peek_buffer with less bytes than it needed. I've seen this fail in a dev world, and I think it could theoretically fail in the peeking of the subsection headers in the current world. Comment qemu_peek_byte to point out it's not guaranteed to work for non-continuous peeks Signed-off-by: Dr. David Alan Gilbert Reviewed-by: ChenLiang Signed-off-by: Juan Quintela --- include/migration/qemu-file.h | 5 ++++ qemu-file.c | 53 ++++++++++++++++++++++++++++++++--- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index a191fb6d8d..c90f5298ab 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -123,6 +123,11 @@ void qemu_put_be32(QEMUFile *f, unsigned int v); void qemu_put_be64(QEMUFile *f, uint64_t v); int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset); int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size); +/* + * Note that you can only peek continuous bytes from where the current pointer + * is; you aren't guaranteed to be able to peak to +n bytes unless you've + * previously peeked +n-1. + */ int qemu_peek_byte(QEMUFile *f, int offset); int qemu_get_byte(QEMUFile *f); void qemu_file_skip(QEMUFile *f, int size); diff --git a/qemu-file.c b/qemu-file.c index 8d5f45dcb0..a8e39127f2 100644 --- a/qemu-file.c +++ b/qemu-file.c @@ -530,7 +530,15 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, return RAM_SAVE_CONTROL_NOT_SUPP; } -static void qemu_fill_buffer(QEMUFile *f) +/* + * Attempt to fill the buffer from the underlying file + * Returns the number of bytes read, or negative value for an error. + * + * Note that it can return a partially full buffer even in a not error/not EOF + * case if the underlying file descriptor gives a short read, and that can + * happen even on a blocking fd. + */ +static ssize_t qemu_fill_buffer(QEMUFile *f) { int len; int pending; @@ -554,6 +562,8 @@ static void qemu_fill_buffer(QEMUFile *f) } else if (len != -EAGAIN) { qemu_file_set_error(f, len); } + + return len; } int qemu_get_fd(QEMUFile *f) @@ -685,17 +695,39 @@ void qemu_file_skip(QEMUFile *f, int size) } } +/* + * Read 'size' bytes from file (at 'offset') into buf without moving the + * pointer. + * + * It will return size bytes unless there was an error, in which case it will + * return as many as it managed to read (assuming blocking fd's which + * all current QEMUFile are) + */ int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset) { int pending; int index; assert(!qemu_file_is_writable(f)); + assert(offset < IO_BUF_SIZE); + assert(size <= IO_BUF_SIZE - offset); + /* The 1st byte to read from */ index = f->buf_index + offset; + /* The number of available bytes starting at index */ pending = f->buf_size - index; - if (pending < size) { - qemu_fill_buffer(f); + + /* + * qemu_fill_buffer might return just a few bytes, even when there isn't + * an error, so loop collecting them until we get enough. + */ + while (pending < size) { + int received = qemu_fill_buffer(f); + + if (received <= 0) { + break; + } + index = f->buf_index + offset; pending = f->buf_size - index; } @@ -711,6 +743,14 @@ int qemu_peek_buffer(QEMUFile *f, uint8_t *buf, int size, size_t offset) return size; } +/* + * Read 'size' bytes of data from the file into buf. + * 'size' can be larger than the internal buffer. + * + * It will return size bytes unless there was an error, in which case it will + * return as many as it managed to read (assuming blocking fd's which + * all current QEMUFile are) + */ int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size) { int pending = size; @@ -719,7 +759,7 @@ int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size) while (pending > 0) { int res; - res = qemu_peek_buffer(f, buf, pending, 0); + res = qemu_peek_buffer(f, buf, MIN(pending, IO_BUF_SIZE), 0); if (res == 0) { return done; } @@ -731,11 +771,16 @@ int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size) return done; } +/* + * Peeks a single byte from the buffer; this isn't guaranteed to work if + * offset leaves a gap after the previous read/peeked data. + */ int qemu_peek_byte(QEMUFile *f, int offset) { int index = f->buf_index + offset; assert(!qemu_file_is_writable(f)); + assert(offset < IO_BUF_SIZE); if (index >= f->buf_size) { qemu_fill_buffer(f); From e30d1d8c7195848abb28a8c734a82b845b8b456a Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 27 Mar 2014 15:01:48 +0000 Subject: [PATCH 28/36] Count used RAMBlock pages for migration_dirty_pages This is a fix for a bug* triggered by a migration after hot unplugging a few virtio-net NICs, that caused migration never to converge, because 'migration_dirty_pages' is incorrectly initialised. 'migration_dirty_pages' is used as a tally of the number of outstanding dirty pages, to give the migration code an idea of how much more data will need to be transferred, and thus whether it can end the iterative phase. It was initialised to the total size of the RAMBlock address space, however hotunplug can leave this space sparse, and hence migration_dirty_pages ended up too large. Signed-off-by: Dr. David Alan Gilbert (* https://bugzilla.redhat.com/show_bug.cgi?id=1074913 ) Signed-off-by: Juan Quintela --- arch_init.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/arch_init.c b/arch_init.c index 60c975db2b..0c8c07d6ba 100644 --- a/arch_init.c +++ b/arch_init.c @@ -726,11 +726,8 @@ static void reset_ram_globals(void) static int ram_save_setup(QEMUFile *f, void *opaque) { RAMBlock *block; - int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; + int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */ - migration_bitmap = bitmap_new(ram_pages); - bitmap_set(migration_bitmap, 0, ram_pages); - migration_dirty_pages = ram_pages; mig_throttle_on = false; dirty_rate_high_cnt = 0; @@ -770,6 +767,22 @@ static int ram_save_setup(QEMUFile *f, void *opaque) bytes_transferred = 0; reset_ram_globals(); + ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS; + migration_bitmap = bitmap_new(ram_bitmap_pages); + bitmap_set(migration_bitmap, 0, ram_bitmap_pages); + + /* + * Count the total number of pages used by ram blocks not including any + * gaps due to alignment or unplugs. + */ + migration_dirty_pages = 0; + QTAILQ_FOREACH(block, &ram_list.blocks, next) { + uint64_t block_pages; + + block_pages = block->length >> TARGET_PAGE_BITS; + migration_dirty_pages += block_pages; + } + memory_global_dirty_log_start(); migration_bitmap_sync(); qemu_mutex_unlock_iothread(); From 0d6ab3ab9149767eba192ec5ad659fd34e55a291 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 19 Mar 2014 18:32:30 +0000 Subject: [PATCH 29/36] Provide init function for ram migration Provide ram_mig_init (like blk_mig_init) for vl.c to initialise stuff to do with ram migration (currently in arch_init.c). Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Gonglei Reviewed-by: Markus Armbruster Signed-off-by: Juan Quintela --- arch_init.c | 7 ++++++- include/migration/migration.h | 2 -- include/sysemu/arch_init.h | 1 + vl.c | 3 +-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch_init.c b/arch_init.c index 0c8c07d6ba..aeebb8ecb2 100644 --- a/arch_init.c +++ b/arch_init.c @@ -1108,7 +1108,7 @@ done: return ret; } -SaveVMHandlers savevm_ram_handlers = { +static SaveVMHandlers savevm_ram_handlers = { .save_live_setup = ram_save_setup, .save_live_iterate = ram_save_iterate, .save_live_complete = ram_save_complete, @@ -1117,6 +1117,11 @@ SaveVMHandlers savevm_ram_handlers = { .cancel = ram_migration_cancel, }; +void ram_mig_init(void) +{ + register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); +} + struct soundhw { const char *name; const char *descr; diff --git a/include/migration/migration.h b/include/migration/migration.h index 3e1e6c72bf..31fbf174d2 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -113,8 +113,6 @@ void free_xbzrle_decoded_buf(void); void acct_update_position(QEMUFile *f, size_t size, bool zero); -extern SaveVMHandlers savevm_ram_handlers; - uint64_t dup_mig_bytes_transferred(void); uint64_t dup_mig_pages_transferred(void); uint64_t skipped_mig_bytes_transferred(void); diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h index be71bcac2d..182d48d8c3 100644 --- a/include/sysemu/arch_init.h +++ b/include/sysemu/arch_init.h @@ -29,6 +29,7 @@ extern const uint32_t arch_type; void select_soundhw(const char *optarg); void do_acpitable_option(const QemuOpts *opts); void do_smbios_option(QemuOpts *opts); +void ram_mig_init(void); void cpudef_init(void); void audio_init(void); int tcg_available(void); diff --git a/vl.c b/vl.c index 236f95efd7..8411a4a08a 100644 --- a/vl.c +++ b/vl.c @@ -4306,6 +4306,7 @@ int main(int argc, char **argv, char **envp) cpu_exec_init_all(); blk_mig_init(); + ram_mig_init(); /* open the virtual block devices */ if (snapshot) @@ -4320,8 +4321,6 @@ int main(int argc, char **argv, char **envp) default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); - register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); - if (nb_numa_nodes > 0) { int i; From d97326eec2ca1313eaf0b5cffd69af5663b5af5d Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 19 Mar 2014 18:32:31 +0000 Subject: [PATCH 30/36] Init the XBZRLE.lock in ram_mig_init Initialising the XBZRLE.lock earlier simplifies the lock use. Based on Markus's patch in: http://lists.gnu.org/archive/html/qemu-devel/2014-03/msg03879.html Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Gonglei Reviewed-by: Markus Armbruster Signed-off-by: Juan Quintela --- arch_init.c | 61 +++++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/arch_init.c b/arch_init.c index aeebb8ecb2..5e2022857a 100644 --- a/arch_init.c +++ b/arch_init.c @@ -45,6 +45,7 @@ #include "hw/audio/pcspk.h" #include "migration/page_cache.h" #include "qemu/config-file.h" +#include "qemu/error-report.h" #include "qmp-commands.h" #include "trace.h" #include "exec/cpu-all.h" @@ -167,11 +168,8 @@ static struct { /* Cache for XBZRLE, Protected by lock. */ PageCache *cache; QemuMutex lock; -} XBZRLE = { - .encoded_buf = NULL, - .current_buf = NULL, - .cache = NULL, -}; +} XBZRLE; + /* buffer used for XBZRLE decoding */ static uint8_t *xbzrle_decoded_buf; @@ -187,41 +185,44 @@ static void XBZRLE_cache_unlock(void) qemu_mutex_unlock(&XBZRLE.lock); } +/* + * called from qmp_migrate_set_cache_size in main thread, possibly while + * a migration is in progress. + * A running migration maybe using the cache and might finish during this + * call, hence changes to the cache are protected by XBZRLE.lock(). + */ int64_t xbzrle_cache_resize(int64_t new_size) { - PageCache *new_cache, *cache_to_free; + PageCache *new_cache; + int64_t ret; if (new_size < TARGET_PAGE_SIZE) { return -1; } - /* no need to lock, the current thread holds qemu big lock */ + XBZRLE_cache_lock(); + if (XBZRLE.cache != NULL) { - /* check XBZRLE.cache again later */ if (pow2floor(new_size) == migrate_xbzrle_cache_size()) { - return pow2floor(new_size); + goto out_new_size; } new_cache = cache_init(new_size / TARGET_PAGE_SIZE, TARGET_PAGE_SIZE); if (!new_cache) { - DPRINTF("Error creating cache\n"); - return -1; + error_report("Error creating cache"); + ret = -1; + goto out; } - XBZRLE_cache_lock(); - /* the XBZRLE.cache may have be destroyed, check it again */ - if (XBZRLE.cache != NULL) { - cache_to_free = XBZRLE.cache; - XBZRLE.cache = new_cache; - } else { - cache_to_free = new_cache; - } - XBZRLE_cache_unlock(); - - cache_fini(cache_to_free); + cache_fini(XBZRLE.cache); + XBZRLE.cache = new_cache; } - return pow2floor(new_size); +out_new_size: + ret = pow2floor(new_size); +out: + XBZRLE_cache_unlock(); + return ret; } /* accounting for migration statistics */ @@ -732,28 +733,27 @@ static int ram_save_setup(QEMUFile *f, void *opaque) dirty_rate_high_cnt = 0; if (migrate_use_xbzrle()) { - qemu_mutex_lock_iothread(); + XBZRLE_cache_lock(); XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / TARGET_PAGE_SIZE, TARGET_PAGE_SIZE); if (!XBZRLE.cache) { - qemu_mutex_unlock_iothread(); - DPRINTF("Error creating cache\n"); + XBZRLE_cache_unlock(); + error_report("Error creating cache"); return -1; } - qemu_mutex_init(&XBZRLE.lock); - qemu_mutex_unlock_iothread(); + XBZRLE_cache_unlock(); /* We prefer not to abort if there is no memory */ XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE); if (!XBZRLE.encoded_buf) { - DPRINTF("Error allocating encoded_buf\n"); + error_report("Error allocating encoded_buf"); return -1; } XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE); if (!XBZRLE.current_buf) { - DPRINTF("Error allocating current_buf\n"); + error_report("Error allocating current_buf"); g_free(XBZRLE.encoded_buf); XBZRLE.encoded_buf = NULL; return -1; @@ -1119,6 +1119,7 @@ static SaveVMHandlers savevm_ram_handlers = { void ram_mig_init(void) { + qemu_mutex_init(&XBZRLE.lock); register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); } From d99598cc9929ad6993ad3d19d9b1ec1d891f0d7f Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 19 Mar 2014 13:34:28 +0000 Subject: [PATCH 31/36] Coverity: Fix failure path for qemu_accept in migration Coverity defects 1005733 & 1005734 complain about passing a negative value to closesocket in the error paths on incoming migration. Stash the error value and print it in the message (previously we gave no indication of the reason for the failure) Use error_report Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Markus Armbruster Signed-off-by: Juan Quintela --- migration-tcp.c | 17 +++++++++++------ migration-unix.c | 17 +++++++++++------ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/migration-tcp.c b/migration-tcp.c index 782572de82..2e34517bb9 100644 --- a/migration-tcp.c +++ b/migration-tcp.c @@ -13,7 +13,10 @@ * GNU GPL, version 2 or (at your option) any later version. */ +#include + #include "qemu-common.h" +#include "qemu/error-report.h" #include "qemu/sockets.h" #include "migration/migration.h" #include "migration/qemu-file.h" @@ -56,24 +59,26 @@ static void tcp_accept_incoming_migration(void *opaque) socklen_t addrlen = sizeof(addr); int s = (intptr_t)opaque; QEMUFile *f; - int c; + int c, err; do { c = qemu_accept(s, (struct sockaddr *)&addr, &addrlen); - } while (c == -1 && socket_error() == EINTR); + err = socket_error(); + } while (c < 0 && err == EINTR); qemu_set_fd_handler2(s, NULL, NULL, NULL, NULL); closesocket(s); DPRINTF("accepted migration\n"); - if (c == -1) { - fprintf(stderr, "could not accept migration connection\n"); - goto out; + if (c < 0) { + error_report("could not accept migration connection (%s)", + strerror(err)); + return; } f = qemu_fopen_socket(c, "rb"); if (f == NULL) { - fprintf(stderr, "could not qemu_fopen socket\n"); + error_report("could not qemu_fopen socket"); goto out; } diff --git a/migration-unix.c b/migration-unix.c index 651fc5b707..0a5f8a1332 100644 --- a/migration-unix.c +++ b/migration-unix.c @@ -13,7 +13,10 @@ * GNU GPL, version 2 or (at your option) any later version. */ +#include + #include "qemu-common.h" +#include "qemu/error-report.h" #include "qemu/sockets.h" #include "qemu/main-loop.h" #include "migration/migration.h" @@ -56,24 +59,26 @@ static void unix_accept_incoming_migration(void *opaque) socklen_t addrlen = sizeof(addr); int s = (intptr_t)opaque; QEMUFile *f; - int c; + int c, err; do { c = qemu_accept(s, (struct sockaddr *)&addr, &addrlen); - } while (c == -1 && errno == EINTR); + err = errno; + } while (c < 0 && err == EINTR); qemu_set_fd_handler2(s, NULL, NULL, NULL, NULL); close(s); DPRINTF("accepted migration\n"); - if (c == -1) { - fprintf(stderr, "could not accept migration connection\n"); - goto out; + if (c < 0) { + error_report("could not accept migration connection (%s)", + strerror(err)); + return; } f = qemu_fopen_socket(c, "rb"); if (f == NULL) { - fprintf(stderr, "could not qemu_fopen socket\n"); + error_report("could not qemu_fopen socket"); goto out; } From 21a246a43b606ee833f907d589d8dcbb54a2761e Mon Sep 17 00:00:00 2001 From: ChenLiang Date: Fri, 25 Apr 2014 17:06:20 +0800 Subject: [PATCH 32/36] migration: remove duplicate code version_id is checked twice in the ram_load. Signed-off-by: ChenLiang Signed-off-by: Gonglei Signed-off-by: Juan Quintela --- arch_init.c | 64 ++++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/arch_init.c b/arch_init.c index 5e2022857a..15a706eb32 100644 --- a/arch_init.c +++ b/arch_init.c @@ -1010,7 +1010,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) seq_iter++; - if (version_id < 4 || version_id > 4) { + if (version_id != 4) { return -EINVAL; } @@ -1021,44 +1021,42 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) addr &= TARGET_PAGE_MASK; if (flags & RAM_SAVE_FLAG_MEM_SIZE) { - if (version_id == 4) { - /* Synchronize RAM block list */ - char id[256]; - ram_addr_t length; - ram_addr_t total_ram_bytes = addr; + /* Synchronize RAM block list */ + char id[256]; + ram_addr_t length; + ram_addr_t total_ram_bytes = addr; - while (total_ram_bytes) { - RAMBlock *block; - uint8_t len; + while (total_ram_bytes) { + RAMBlock *block; + uint8_t len; - len = qemu_get_byte(f); - qemu_get_buffer(f, (uint8_t *)id, len); - id[len] = 0; - length = qemu_get_be64(f); + len = qemu_get_byte(f); + qemu_get_buffer(f, (uint8_t *)id, len); + id[len] = 0; + length = qemu_get_be64(f); - QTAILQ_FOREACH(block, &ram_list.blocks, next) { - if (!strncmp(id, block->idstr, sizeof(id))) { - if (block->length != length) { - fprintf(stderr, - "Length mismatch: %s: " RAM_ADDR_FMT - " in != " RAM_ADDR_FMT "\n", id, length, - block->length); - ret = -EINVAL; - goto done; - } - break; + QTAILQ_FOREACH(block, &ram_list.blocks, next) { + if (!strncmp(id, block->idstr, sizeof(id))) { + if (block->length != length) { + fprintf(stderr, + "Length mismatch: %s: " RAM_ADDR_FMT + " in != " RAM_ADDR_FMT "\n", id, length, + block->length); + ret = -EINVAL; + goto done; } + break; } - - if (!block) { - fprintf(stderr, "Unknown ramblock \"%s\", cannot " - "accept migration\n", id); - ret = -EINVAL; - goto done; - } - - total_ram_bytes -= length; } + + if (!block) { + fprintf(stderr, "Unknown ramblock \"%s\", cannot " + "accept migration\n", id); + ret = -EINVAL; + goto done; + } + + total_ram_bytes -= length; } } From 1534ee93cc6be992c05577886b24bd44c37ecff6 Mon Sep 17 00:00:00 2001 From: ChenLiang Date: Fri, 4 Apr 2014 17:57:53 +0800 Subject: [PATCH 33/36] XBZRLE: Fix one XBZRLE corruption issues The page may not be inserted into cache after executing save_xbzrle_page. In case of failure to insert, the original page should be sent rather than the page in the cache. Signed-off-by: ChenLiang Signed-off-by: Gonglei Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- arch_init.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/arch_init.c b/arch_init.c index 15a706eb32..0ffecee7fb 100644 --- a/arch_init.c +++ b/arch_init.c @@ -341,7 +341,7 @@ static void xbzrle_cache_zero_page(ram_addr_t current_addr) #define ENCODING_FLAG_XBZRLE 0x1 -static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, +static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, ram_addr_t current_addr, RAMBlock *block, ram_addr_t offset, int cont, bool last_stage) { @@ -349,19 +349,23 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, uint8_t *prev_cached_page; if (!cache_is_cached(XBZRLE.cache, current_addr)) { + acct_info.xbzrle_cache_miss++; if (!last_stage) { - if (cache_insert(XBZRLE.cache, current_addr, current_data) == -1) { + if (cache_insert(XBZRLE.cache, current_addr, *current_data) == -1) { return -1; + } else { + /* update *current_data when the page has been + inserted into cache */ + *current_data = get_cached_data(XBZRLE.cache, current_addr); } } - acct_info.xbzrle_cache_miss++; return -1; } prev_cached_page = get_cached_data(XBZRLE.cache, current_addr); /* save current buffer into memory */ - memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE); + memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE); /* XBZRLE encoding (if there is no overflow) */ encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf, @@ -374,7 +378,10 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, DPRINTF("Overflow\n"); acct_info.xbzrle_overflows++; /* update data in the cache */ - memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE); + if (!last_stage) { + memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE); + *current_data = prev_cached_page; + } return -1; } @@ -599,15 +606,9 @@ static int ram_save_block(QEMUFile *f, bool last_stage) */ xbzrle_cache_zero_page(current_addr); } else if (!ram_bulk_stage && migrate_use_xbzrle()) { - bytes_sent = save_xbzrle_page(f, p, current_addr, block, + bytes_sent = save_xbzrle_page(f, &p, current_addr, block, offset, cont, last_stage); if (!last_stage) { - /* We must send exactly what's in the xbzrle cache - * even if the page wasn't xbzrle compressed, so that - * it's right next time. - */ - p = get_cached_data(XBZRLE.cache, current_addr); - /* Can't send this cached data async, since the cache page * might get updated before it gets to the wire */ From 71411d358000cf90ced348b1ce9142c13b5a93cd Mon Sep 17 00:00:00 2001 From: ChenLiang Date: Fri, 4 Apr 2014 17:57:54 +0800 Subject: [PATCH 34/36] migration: Add counts of updating the dirty bitmap Add counts to log the times of updating the dirty bitmap. Signed-off-by: ChenLiang Signed-off-by: Gonglei Reviewed-by: Eric Blake Signed-off-by: Juan Quintela --- arch_init.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch_init.c b/arch_init.c index 0ffecee7fb..c02bce65f6 100644 --- a/arch_init.c +++ b/arch_init.c @@ -111,6 +111,8 @@ static bool mig_throttle_on; static int dirty_rate_high_cnt; static void check_guest_throttling(void); +static uint64_t bitmap_sync_count; + /***********************************************************/ /* ram save/restore */ @@ -488,6 +490,8 @@ static void migration_bitmap_sync(void) int64_t end_time; int64_t bytes_xfer_now; + bitmap_sync_count++; + if (!bytes_xfer_prev) { bytes_xfer_prev = ram_bytes_transferred(); } @@ -732,6 +736,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) mig_throttle_on = false; dirty_rate_high_cnt = 0; + bitmap_sync_count = 0; if (migrate_use_xbzrle()) { XBZRLE_cache_lock(); From 58570ed894631904bcdbcd1e8b34479cebe2aae9 Mon Sep 17 00:00:00 2001 From: ChenLiang Date: Fri, 4 Apr 2014 17:57:55 +0800 Subject: [PATCH 35/36] migration: expose the bitmap_sync_count to the end expose the count that logs the times of updating the dirty bitmap to end user. Signed-off-by: ChenLiang Signed-off-by: Gonglei Reviewed-by: Eric Blake Signed-off-by: Juan Quintela --- arch_init.c | 1 + hmp.c | 2 ++ include/migration/migration.h | 1 + migration.c | 2 ++ qapi-schema.json | 4 +++- qmp-commands.hx | 13 +++++++++---- 6 files changed, 18 insertions(+), 5 deletions(-) diff --git a/arch_init.c b/arch_init.c index c02bce65f6..5f05e992df 100644 --- a/arch_init.c +++ b/arch_init.c @@ -537,6 +537,7 @@ static void migration_bitmap_sync(void) s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; start_time = end_time; num_dirty_pages_period = 0; + s->dirty_sync_count = bitmap_sync_count; } } diff --git a/hmp.c b/hmp.c index ca869bafa8..69a70e473b 100644 --- a/hmp.c +++ b/hmp.c @@ -188,6 +188,8 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->ram->normal); monitor_printf(mon, "normal bytes: %" PRIu64 " kbytes\n", info->ram->normal_bytes >> 10); + monitor_printf(mon, "dirty sync count: %" PRIu64 "\n", + info->ram->dirty_sync_count); if (info->ram->dirty_pages_rate) { monitor_printf(mon, "dirty pages rate: %" PRIu64 " pages\n", info->ram->dirty_pages_rate); diff --git a/include/migration/migration.h b/include/migration/migration.h index 31fbf174d2..8d88c7dbd2 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -61,6 +61,7 @@ struct MigrationState bool enabled_capabilities[MIGRATION_CAPABILITY_MAX]; int64_t xbzrle_cache_size; int64_t setup_time; + int64_t dirty_sync_count; }; void process_incoming_migration(QEMUFile *f); diff --git a/migration.c b/migration.c index ac232758b9..2cb768d9d0 100644 --- a/migration.c +++ b/migration.c @@ -215,6 +215,7 @@ MigrationInfo *qmp_query_migrate(Error **errp) info->ram->normal_bytes = norm_mig_bytes_transferred(); info->ram->dirty_pages_rate = s->dirty_pages_rate; info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = s->dirty_sync_count; if (blk_mig_active()) { info->has_disk = true; @@ -248,6 +249,7 @@ MigrationInfo *qmp_query_migrate(Error **errp) info->ram->normal = norm_mig_pages_transferred(); info->ram->normal_bytes = norm_mig_bytes_transferred(); info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = s->dirty_sync_count; break; case MIG_STATE_ERROR: info->has_status = true; diff --git a/qapi-schema.json b/qapi-schema.json index 0b00427c8c..7b950393b4 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -651,13 +651,15 @@ # # @mbps: throughput in megabits/sec. (since 1.6) # +# @dirty-sync-count: number of times that dirty ram was synchronized (since 2.1) +# # Since: 0.14.0 ## { 'type': 'MigrationStats', 'data': {'transferred': 'int', 'remaining': 'int', 'total': 'int' , 'duplicate': 'int', 'skipped': 'int', 'normal': 'int', 'normal-bytes': 'int', 'dirty-pages-rate' : 'int', - 'mbps' : 'number' } } + 'mbps' : 'number', 'dirty-sync-count' : 'int' } } ## # @XBZRLECacheStats diff --git a/qmp-commands.hx b/qmp-commands.hx index ed3ab9225b..aadcd04bb9 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2967,6 +2967,7 @@ The main json-object contains the following: pages. This is just normal pages times size of one page, but this way upper levels don't need to care about page size (json-int) + - "dirty-sync-count": times that dirty ram was synchronized (json-int) - "disk": only present if "status" is "active" and it is a block migration, it is a json-object with the following disk information: - "transferred": amount transferred in bytes (json-int) @@ -3004,7 +3005,8 @@ Examples: "downtime":12345, "duplicate":123, "normal":123, - "normal-bytes":123456 + "normal-bytes":123456, + "dirty-sync-count":15 } } } @@ -3029,7 +3031,8 @@ Examples: "expected-downtime":12345, "duplicate":123, "normal":123, - "normal-bytes":123456 + "normal-bytes":123456, + "dirty-sync-count":15 } } } @@ -3049,7 +3052,8 @@ Examples: "expected-downtime":12345, "duplicate":123, "normal":123, - "normal-bytes":123456 + "normal-bytes":123456, + "dirty-sync-count":15 }, "disk":{ "total":20971520, @@ -3075,7 +3079,8 @@ Examples: "expected-downtime":12345, "duplicate":10, "normal":3333, - "normal-bytes":3412992 + "normal-bytes":3412992, + "dirty-sync-count":15 }, "xbzrle-cache":{ "cache-size":67108864, From 8bc3923343e91902ca541112b3bdb5448f8d288e Mon Sep 17 00:00:00 2001 From: ChenLiang Date: Fri, 4 Apr 2014 17:57:56 +0800 Subject: [PATCH 36/36] migration: expose xbzrle cache miss rate expose xbzrle cache miss rate Signed-off-by: ChenLiang Signed-off-by: Gonglei Reviewed-by: Eric Blake Signed-off-by: Juan Quintela --- arch_init.c | 18 ++++++++++++++++++ hmp.c | 2 ++ include/migration/migration.h | 1 + migration.c | 1 + qapi-schema.json | 5 ++++- qmp-commands.hx | 2 ++ 6 files changed, 28 insertions(+), 1 deletion(-) diff --git a/arch_init.c b/arch_init.c index 5f05e992df..be743fd1d0 100644 --- a/arch_init.c +++ b/arch_init.c @@ -236,6 +236,7 @@ typedef struct AccountingInfo { uint64_t xbzrle_bytes; uint64_t xbzrle_pages; uint64_t xbzrle_cache_miss; + double xbzrle_cache_miss_rate; uint64_t xbzrle_overflows; } AccountingInfo; @@ -291,6 +292,11 @@ uint64_t xbzrle_mig_pages_cache_miss(void) return acct_info.xbzrle_cache_miss; } +double xbzrle_mig_cache_miss_rate(void) +{ + return acct_info.xbzrle_cache_miss_rate; +} + uint64_t xbzrle_mig_pages_overflow(void) { return acct_info.xbzrle_overflows; @@ -489,6 +495,8 @@ static void migration_bitmap_sync(void) static int64_t num_dirty_pages_period; int64_t end_time; int64_t bytes_xfer_now; + static uint64_t xbzrle_cache_miss_prev; + static uint64_t iterations_prev; bitmap_sync_count++; @@ -532,6 +540,16 @@ static void migration_bitmap_sync(void) } else { mig_throttle_on = false; } + if (migrate_use_xbzrle()) { + if (iterations_prev != 0) { + acct_info.xbzrle_cache_miss_rate = + (double)(acct_info.xbzrle_cache_miss - + xbzrle_cache_miss_prev) / + (acct_info.iterations - iterations_prev); + } + iterations_prev = acct_info.iterations; + xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss; + } s->dirty_pages_rate = num_dirty_pages_period * 1000 / (end_time - start_time); s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; diff --git a/hmp.c b/hmp.c index 69a70e473b..903e0a1dd7 100644 --- a/hmp.c +++ b/hmp.c @@ -214,6 +214,8 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->xbzrle_cache->pages); monitor_printf(mon, "xbzrle cache miss: %" PRIu64 "\n", info->xbzrle_cache->cache_miss); + monitor_printf(mon, "xbzrle cache miss rate: %0.2f\n", + info->xbzrle_cache->cache_miss_rate); monitor_printf(mon, "xbzrle overflow : %" PRIu64 "\n", info->xbzrle_cache->overflow); } diff --git a/include/migration/migration.h b/include/migration/migration.h index 8d88c7dbd2..3cb5ba80c3 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -124,6 +124,7 @@ uint64_t xbzrle_mig_bytes_transferred(void); uint64_t xbzrle_mig_pages_transferred(void); uint64_t xbzrle_mig_pages_overflow(void); uint64_t xbzrle_mig_pages_cache_miss(void); +double xbzrle_mig_cache_miss_rate(void); void ram_handle_compressed(void *host, uint8_t ch, uint64_t size); diff --git a/migration.c b/migration.c index 2cb768d9d0..52cda279af 100644 --- a/migration.c +++ b/migration.c @@ -174,6 +174,7 @@ static void get_xbzrle_cache_stats(MigrationInfo *info) info->xbzrle_cache->bytes = xbzrle_mig_bytes_transferred(); info->xbzrle_cache->pages = xbzrle_mig_pages_transferred(); info->xbzrle_cache->cache_miss = xbzrle_mig_pages_cache_miss(); + info->xbzrle_cache->cache_miss_rate = xbzrle_mig_cache_miss_rate(); info->xbzrle_cache->overflow = xbzrle_mig_pages_overflow(); } } diff --git a/qapi-schema.json b/qapi-schema.json index 7b950393b4..36cb964dfd 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -674,13 +674,16 @@ # # @cache-miss: number of cache miss # +# @cache-miss-rate: rate of cache miss (since 2.1) +# # @overflow: number of overflows # # Since: 1.2 ## { 'type': 'XBZRLECacheStats', 'data': {'cache-size': 'int', 'bytes': 'int', 'pages': 'int', - 'cache-miss': 'int', 'overflow': 'int' } } + 'cache-miss': 'int', 'cache-miss-rate': 'number', + 'overflow': 'int' } } ## # @MigrationInfo diff --git a/qmp-commands.hx b/qmp-commands.hx index aadcd04bb9..f437937e2c 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -2979,6 +2979,7 @@ The main json-object contains the following: - "bytes": number of bytes transferred for XBZRLE compressed pages - "pages": number of XBZRLE compressed pages - "cache-miss": number of XBRZRLE page cache misses + - "cache-miss-rate": rate of XBRZRLE page cache misses - "overflow": number of times XBZRLE overflows. This means that the XBZRLE encoding was bigger than just sent the whole page, and then we sent the whole page instead (as as @@ -3087,6 +3088,7 @@ Examples: "bytes":20971520, "pages":2444343, "cache-miss":2244, + "cache-miss-rate":0.123, "overflow":34434 } }