From 7145872ed373d75c4d5de40e55248a0840a15f70 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 18 Jun 2014 17:20:42 +0300 Subject: [PATCH 01/23] vhost: block migration if backend does not log memory vhost user does not support LOG_ALL feature bit. Generally, we should not try to set this bit without checking that backend can support it first. Detect and block migration. Signed-off-by: Michael S. Tsirkin Signed-off-by: Nikolay Nikolaev Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 11 +++++++++++ include/hw/virtio/vhost.h | 1 + 2 files changed, 12 insertions(+) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index c1b1aad6cf..84d382b853 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -20,6 +20,7 @@ #include #include "exec/address-spaces.h" #include "hw/virtio/virtio-bus.h" +#include "migration/migration.h" static void vhost_dev_sync_region(struct vhost_dev *dev, MemoryRegionSection *section, @@ -854,6 +855,12 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, .eventfd_del = vhost_eventfd_del, .priority = 10 }; + hdev->migration_blocker = NULL; + if (!(hdev->features & (0x1 << VHOST_F_LOG_ALL))) { + error_setg(&hdev->migration_blocker, + "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature."); + migrate_add_blocker(hdev->migration_blocker); + } hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions)); hdev->n_mem_sections = 0; hdev->mem_sections = NULL; @@ -882,6 +889,10 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) vhost_virtqueue_cleanup(hdev->vqs + i); } memory_listener_unregister(&hdev->memory_listener); + if (hdev->migration_blocker) { + migrate_del_blocker(hdev->migration_blocker); + error_free(hdev->migration_blocker); + } g_free(hdev->mem); g_free(hdev->mem_sections); hdev->vhost_ops->vhost_backend_cleanup(hdev); diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 33028ec8c2..d5593d1620 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -45,6 +45,7 @@ struct vhost_dev { bool log_enabled; vhost_log_chunk_t *log; unsigned long long log_size; + Error *migration_blocker; bool force; bool memory_changed; hwaddr mem_changed_start_addr; From 8617343faae6ba7e916137c6c9e3ef22c00565d8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 18 Jun 2014 18:55:22 +0300 Subject: [PATCH 02/23] vhost: fix resource leak in error handling vhost_verify_ring_mappings leaks mappings on error. Fix this up. Cc: qemu-stable@nongnu.org Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 84d382b853..e55fe1cc7e 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -305,7 +305,9 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, uint64_t size) { int i; - for (i = 0; i < dev->nvqs; ++i) { + int r = 0; + + for (i = 0; !r && i < dev->nvqs; ++i) { struct vhost_virtqueue *vq = dev->vqs + i; hwaddr l; void *p; @@ -317,15 +319,15 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, p = cpu_physical_memory_map(vq->ring_phys, &l, 1); if (!p || l != vq->ring_size) { fprintf(stderr, "Unable to map ring buffer for ring %d\n", i); - return -ENOMEM; + r = -ENOMEM; } if (p != vq->ring) { fprintf(stderr, "Ring buffer relocated for ring %d\n", i); - return -EBUSY; + r = -EBUSY; } cpu_physical_memory_unmap(p, l, 0, 0); } - return 0; + return r; } static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev, From 8f4e5ac3e27f832c09d5de274c84a7f22de0e4dd Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Thu, 19 Jun 2014 16:14:43 +0200 Subject: [PATCH 03/23] qapi/hmp: use 'backend' instead of 'device' with memory backend fixup documentation comments and HMP message/help text Signed-off-by: Igor Mammedov Reviewed-by: Eric Blake Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hmp.c | 2 +- monitor.c | 2 +- qapi-schema.json | 12 +++++++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/hmp.c b/hmp.c index 41006f5eef..e183a79b8a 100644 --- a/hmp.c +++ b/hmp.c @@ -1692,7 +1692,7 @@ void hmp_info_memdev(Monitor *mon, const QDict *qdict) ov = string_output_visitor_new(false); visit_type_uint16List(string_output_get_visitor(ov), &m->value->host_nodes, NULL, NULL); - monitor_printf(mon, "memory device %d\n", i); + monitor_printf(mon, "memory backend: %d\n", i); monitor_printf(mon, " size: %" PRId64 "\n", m->value->size); monitor_printf(mon, " merge: %s\n", m->value->merge ? "true" : "false"); diff --git a/monitor.c b/monitor.c index c7f879713e..8c17f899b3 100644 --- a/monitor.c +++ b/monitor.c @@ -2968,7 +2968,7 @@ static mon_cmd_t info_cmds[] = { .name = "memdev", .args_type = "", .params = "", - .help = "show the memory device", + .help = "show memory backends", .mhandler.cmd = hmp_info_memdev, }, { diff --git a/qapi-schema.json b/qapi-schema.json index 98350048f6..f490403d81 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3158,19 +3158,19 @@ ## # @Memdev: # -# Information of memory device +# Information about memory backend # -# @size: memory device size +# @size: memory backend size # # @merge: enables or disables memory merge support # -# @dump: includes memory device's memory in a core dump or not +# @dump: includes memory backend's memory in a core dump or not # # @prealloc: enables or disables memory preallocation # # @host-nodes: host nodes for its memory policy # -# @policy: memory policy of memory device +# @policy: memory policy of memory backend # # Since: 2.1 ## @@ -3187,13 +3187,15 @@ ## # @query-memdev: # -# Returns information for all memory devices. +# Returns information for all memory backends. # # Returns: a list of @Memdev. # # Since: 2.1 ## { 'command': 'query-memdev', 'returns': ['Memdev'] } + +## # @PCDIMMDeviceInfo: # # PCDIMMDevice state information From 1dc75c6d74738cfc06dbcf4a2af6c09af692a640 Mon Sep 17 00:00:00 2001 From: Nikolay Nikolaev Date: Thu, 19 Jun 2014 18:07:40 +0300 Subject: [PATCH 04/23] libqemustub: add more stubs for qemu-char Additional stubs: - chr_baum_init - qemu_chr_open_spice_vmc - qemu_chr_open_spice_port Signed-off-by: Nikolay Nikolaev Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- stubs/Makefile.objs | 2 ++ stubs/chr-baum-init.c | 7 +++++++ stubs/qemu-chr-open-spice.c | 14 ++++++++++++++ 3 files changed, 23 insertions(+) create mode 100644 stubs/chr-baum-init.c create mode 100644 stubs/qemu-chr-open-spice.c diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs index 997d68d5b9..03210ad560 100644 --- a/stubs/Makefile.objs +++ b/stubs/Makefile.objs @@ -1,5 +1,6 @@ stub-obj-y += arch-query-cpu-def.o stub-obj-y += bdrv-commit-all.o +stub-obj-y += chr-baum-init.o stub-obj-y += chr-msmouse.o stub-obj-y += clock-warp.o stub-obj-y += cpu-get-clock.o @@ -24,6 +25,7 @@ stub-obj-y += mon-set-error.o stub-obj-y += monitor-init.o stub-obj-y += notify-event.o stub-obj-y += pci-drive-hot-add.o +stub-obj-$(CONFIG_SPICE) += qemu-chr-open-spice.o stub-obj-y += qtest.o stub-obj-y += reset.o stub-obj-y += runstate-check.o diff --git a/stubs/chr-baum-init.c b/stubs/chr-baum-init.c new file mode 100644 index 0000000000..f5cc6ce1f8 --- /dev/null +++ b/stubs/chr-baum-init.c @@ -0,0 +1,7 @@ +#include "qemu-common.h" +#include "sysemu/char.h" + +CharDriverState *chr_baum_init(void) +{ + return NULL; +} diff --git a/stubs/qemu-chr-open-spice.c b/stubs/qemu-chr-open-spice.c new file mode 100644 index 0000000000..f1c4849d9c --- /dev/null +++ b/stubs/qemu-chr-open-spice.c @@ -0,0 +1,14 @@ +#include "qemu-common.h" +#include "ui/qemu-spice.h" + +CharDriverState *qemu_chr_open_spice_vmc(const char *type) +{ + return NULL; +} + +#if SPICE_SERVER_VERSION >= 0x000c02 +CharDriverState *qemu_chr_open_spice_port(const char *name) +{ + return NULL; +} +#endif From bd95939fc804f3a6ffddbe761cdb21421808f156 Mon Sep 17 00:00:00 2001 From: Nikolay Nikolaev Date: Thu, 19 Jun 2014 18:07:59 +0300 Subject: [PATCH 05/23] qtest: fix qtest for vhost-user Fix compile for older glib, provide conditionally compiled versions of the used glib APIs. Signed-off-by: Nikolay Nikolaev Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/vhost-user-test.c | 128 +++++++++++++++++++++++++++++++++++----- 1 file changed, 113 insertions(+), 15 deletions(-) diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index 7c826b49e5..2934379d90 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -8,17 +8,30 @@ * */ +#define QEMU_GLIB_COMPAT_H +#include + #include "libqtest.h" #include "qemu/option.h" #include "sysemu/char.h" #include "sysemu/sysemu.h" -#include #include #include #include #include +/* GLIB version compatibility flags */ +#if GLIB_CHECK_VERSION(2, 28, 0) +#define HAVE_MONOTONIC_TIME +#endif + +#if GLIB_CHECK_VERSION(2, 32, 0) +#define HAVE_MUTEX_INIT +#define HAVE_COND_INIT +#define HAVE_THREAD_NEW +#endif + #define QEMU_CMD_ACCEL " -machine accel=tcg" #define QEMU_CMD_MEM " -m 512 -object memory-backend-file,id=mem,size=512M,"\ "mem-path=%s,share=on -numa node,memdev=mem" @@ -95,8 +108,93 @@ static VhostUserMsg m __attribute__ ((unused)); int fds_num = 0, fds[VHOST_MEMORY_MAX_NREGIONS]; static VhostUserMemory memory; -static GMutex data_mutex; -static GCond data_cond; +static GMutex *data_mutex; +static GCond *data_cond; + +static gint64 _get_time(void) +{ +#ifdef HAVE_MONOTONIC_TIME + return g_get_monotonic_time(); +#else + GTimeVal time; + g_get_current_time(&time); + + return time.tv_sec * G_TIME_SPAN_SECOND + time.tv_usec; +#endif +} + +static GMutex *_mutex_new(void) +{ + GMutex *mutex; + +#ifdef HAVE_MUTEX_INIT + mutex = g_new(GMutex, 1); + g_mutex_init(mutex); +#else + mutex = g_mutex_new(); +#endif + + return mutex; +} + +static void _mutex_free(GMutex *mutex) +{ +#ifdef HAVE_MUTEX_INIT + g_mutex_clear(mutex); + g_free(mutex); +#else + g_mutex_free(mutex); +#endif +} + +static GCond *_cond_new(void) +{ + GCond *cond; + +#ifdef HAVE_COND_INIT + cond = g_new(GCond, 1); + g_cond_init(cond); +#else + cond = g_cond_new(); +#endif + + return cond; +} + +static gboolean _cond_wait_until(GCond *cond, GMutex *mutex, gint64 end_time) +{ + gboolean ret = FALSE; +#ifdef HAVE_COND_INIT + ret = g_cond_wait_until(cond, mutex, end_time); +#else + GTimeVal time = { end_time / G_TIME_SPAN_SECOND, + end_time % G_TIME_SPAN_SECOND }; + ret = g_cond_timed_wait(cond, mutex, &time); +#endif + return ret; +} + +static void _cond_free(GCond *cond) +{ +#ifdef HAVE_COND_INIT + g_cond_clear(cond); + g_free(cond); +#else + g_cond_free(cond); +#endif +} + +static GThread *_thread_new(const gchar *name, GThreadFunc func, gpointer data) +{ + GThread *thread = NULL; + GError *error = NULL; +#ifdef HAVE_THREAD_NEW + thread = g_thread_try_new(name, func, data, &error); +#else + thread = g_thread_create(func, data, TRUE, &error); +#endif + return thread; +} static void read_guest_mem(void) { @@ -104,11 +202,11 @@ static void read_guest_mem(void) gint64 end_time; int i, j; - g_mutex_lock(&data_mutex); + g_mutex_lock(data_mutex); - end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND; + end_time = _get_time() + 5 * G_TIME_SPAN_SECOND; while (!fds_num) { - if (!g_cond_wait_until(&data_cond, &data_mutex, end_time)) { + if (!_cond_wait_until(data_cond, data_mutex, end_time)) { /* timeout has passed */ g_assert(fds_num); break; @@ -143,7 +241,7 @@ static void read_guest_mem(void) } g_assert_cmpint(1, ==, 1); - g_mutex_unlock(&data_mutex); + g_mutex_unlock(data_mutex); } static void *thread_function(void *data) @@ -203,8 +301,8 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) fds_num = qemu_chr_fe_get_msgfds(chr, fds, sizeof(fds) / sizeof(int)); /* signal the test that it can continue */ - g_cond_signal(&data_cond); - g_mutex_unlock(&data_mutex); + g_cond_signal(data_cond); + g_mutex_unlock(data_mutex); break; case VHOST_USER_SET_VRING_KICK: @@ -285,10 +383,10 @@ int main(int argc, char **argv) qemu_chr_add_handlers(chr, chr_can_read, chr_read, NULL, chr); /* run the main loop thread so the chardev may operate */ - g_mutex_init(&data_mutex); - g_cond_init(&data_cond); - g_mutex_lock(&data_mutex); - g_thread_new(NULL, thread_function, NULL); + data_mutex = _mutex_new(); + data_cond = _cond_new(); + g_mutex_lock(data_mutex); + _thread_new(NULL, thread_function, NULL); qemu_cmd = g_strdup_printf(QEMU_CMD, hugefs, socket_path); s = qtest_start(qemu_cmd); @@ -305,8 +403,8 @@ int main(int argc, char **argv) /* cleanup */ unlink(socket_path); g_free(socket_path); - g_cond_clear(&data_cond); - g_mutex_clear(&data_mutex); + _cond_free(data_cond); + _mutex_free(data_mutex); return ret; } From f61badf32f1870e80e579f9dac363891a226e976 Mon Sep 17 00:00:00 2001 From: Nikolay Nikolaev Date: Thu, 19 Jun 2014 18:08:18 +0300 Subject: [PATCH 06/23] qtest: fix vhost-user-test unbalanced mutex locks Signed-off-by: Nikolay Nikolaev Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/vhost-user-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index 2934379d90..2af2381a1d 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -269,6 +269,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) return; } + g_mutex_lock(data_mutex); memcpy(p, buf, VHOST_USER_HDR_SIZE); if (msg.size) { @@ -302,7 +303,6 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) /* signal the test that it can continue */ g_cond_signal(data_cond); - g_mutex_unlock(data_mutex); break; case VHOST_USER_SET_VRING_KICK: @@ -319,6 +319,7 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) default: break; } + g_mutex_unlock(data_mutex); } static const char *init_hugepagefs(void) @@ -385,7 +386,6 @@ int main(int argc, char **argv) /* run the main loop thread so the chardev may operate */ data_mutex = _mutex_new(); data_cond = _cond_new(); - g_mutex_lock(data_mutex); _thread_new(NULL, thread_function, NULL); qemu_cmd = g_strdup_printf(QEMU_CMD, hugefs, socket_path); From 6a2acedb19221ddf5e6fd3fb3590ba636aa21007 Mon Sep 17 00:00:00 2001 From: "Gabriel L. Somlo" Date: Thu, 19 Jun 2014 11:55:33 -0400 Subject: [PATCH 07/23] e1000: emulate auto-negotiation during external link status change This patch emulates auto-negotiation when the network link status is modified externally (i.e. via "set_link off/on"). Also, a couple of cleanup items: - unset PHY status reg. AUTONEG_COMPLETE during link_down() - set PHY status reg. AUTONEG_COMPLETE during autoneg_timer() only if we actually brought the link up. - group all checks for "can we, and should we autonegotiate?" together for more clarity. Signed-off-by: Gabriel Somlo Reviewed-by: Alexander Graf Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/e1000.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/hw/net/e1000.c b/hw/net/e1000.c index 57bdffde08..9c6af069aa 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -175,6 +175,7 @@ e1000_link_down(E1000State *s) { s->mac_reg[STATUS] &= ~E1000_STATUS_LU; s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS; + s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE; } static void @@ -197,7 +198,6 @@ set_phy_ctrl(E1000State *s, int index, uint16_t val) } if ((val & MII_CR_AUTO_NEG_EN) && (val & MII_CR_RESTART_AUTO_NEG)) { e1000_link_down(s); - s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE; DBGOUT(PHY, "Start link auto negotiation\n"); timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); } @@ -209,9 +209,9 @@ e1000_autoneg_timer(void *opaque) E1000State *s = opaque; if (!qemu_get_queue(s->nic)->link_down) { e1000_link_up(s); + s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; + DBGOUT(PHY, "Auto negotiation is completed\n"); } - s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; - DBGOUT(PHY, "Auto negotiation is completed\n"); } static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = { @@ -853,7 +853,16 @@ e1000_set_link_status(NetClientState *nc) if (nc->link_down) { e1000_link_down(s); } else { - e1000_link_up(s); + if (s->compat_flags & E1000_FLAG_AUTONEG && + s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN && + s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG && + !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { + /* emulate auto-negotiation if supported */ + timer_mod(s->autoneg_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); + } else { + e1000_link_up(s); + } } if (s->mac_reg[STATUS] != old_status) @@ -1279,16 +1288,13 @@ static void e1000_pre_save(void *opaque) e1000_mit_timer(s); } - if (!(s->compat_flags & E1000_FLAG_AUTONEG)) { - return; - } - /* - * If link is down and auto-negotiation is ongoing, complete - * auto-negotiation immediately. This allows is to look at - * MII_SR_AUTONEG_COMPLETE to infer link status on load. + * If link is down and auto-negotiation is supported and ongoing, + * complete auto-negotiation immediately. This allows us to look + * at MII_SR_AUTONEG_COMPLETE to infer link status on load. */ if (nc->link_down && + s->compat_flags & E1000_FLAG_AUTONEG && s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN && s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) { s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; @@ -1313,11 +1319,8 @@ static int e1000_post_load(void *opaque, int version_id) * Alternatively, restart link negotiation if it was in progress. */ nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0; - if (!(s->compat_flags & E1000_FLAG_AUTONEG)) { - return 0; - } - - if (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN && + if (s->compat_flags & E1000_FLAG_AUTONEG && + s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN && s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG && !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { nc->link_down = false; From 6883b5914029fa8ffc42a43d2a2188493c27fd58 Mon Sep 17 00:00:00 2001 From: "Gabriel L. Somlo" Date: Thu, 19 Jun 2014 11:55:34 -0400 Subject: [PATCH 08/23] e1000: improve auto-negotiation reporting via mii-tool Using mii-tool (on F20-live), the following output is produced: SIOCGMIIREG on ens3 failed: Input/output error ens3: no autonegotiation, 1000baseT-FD flow-control, link ok The first line (SIOCGMIIREG error) is due to mii-tool's inability to read the PHY auto-negotiation expansion register. On the second line, "no autonegotiation" is wrong, and caused by the absence of a flag in the link partner ability register which would indicate that our link partner has acked us. This flag is listed as "reserved" in the Intel e1000 manual, but mii-tool uses it as LPA_LPACK from /usr/include/linux/mii.h. This patch adds read access to PHY_AUTONEG_EXP and defines the link partner ack flag, allowing mii-tool to generate output as normally expected: ens3: negotiated 1000baseT-FD flow-control, link ok Signed-off-by: Gabriel Somlo Reviewed-by: Alexander Graf Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/e1000.c | 5 ++++- hw/net/e1000_regs.h | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/net/e1000.c b/hw/net/e1000.c index 9c6af069aa..d6ef802ce9 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -176,6 +176,7 @@ e1000_link_down(E1000State *s) s->mac_reg[STATUS] &= ~E1000_STATUS_LU; s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS; s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE; + s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK; } static void @@ -209,6 +210,7 @@ e1000_autoneg_timer(void *opaque) E1000State *s = opaque; if (!qemu_get_queue(s->nic)->link_down) { e1000_link_up(s); + s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK; s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; DBGOUT(PHY, "Auto negotiation is completed\n"); } @@ -227,7 +229,8 @@ static const char phy_regcap[0x20] = { [PHY_CTRL] = PHY_RW, [PHY_1000T_CTRL] = PHY_RW, [PHY_LP_ABILITY] = PHY_R, [PHY_1000T_STATUS] = PHY_R, [PHY_AUTONEG_ADV] = PHY_RW, [M88E1000_RX_ERR_CNTR] = PHY_R, - [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R + [PHY_ID2] = PHY_R, [M88E1000_PHY_SPEC_STATUS] = PHY_R, + [PHY_AUTONEG_EXP] = PHY_R, }; /* PHY_ID2 documented in 8254x_GBe_SDM.pdf, pp. 250 */ diff --git a/hw/net/e1000_regs.h b/hw/net/e1000_regs.h index 13ac6713d4..60b96aaf13 100644 --- a/hw/net/e1000_regs.h +++ b/hw/net/e1000_regs.h @@ -384,6 +384,9 @@ #define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */ #define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */ +/* PHY Link Partner Ability Register */ +#define MII_LPAR_LPACK 0x4000 /* Acked by link partner */ + /* Interrupt Cause Read */ #define E1000_ICR_TXDW 0x00000001 /* Transmit desc written back */ #define E1000_ICR_TXQE 0x00000002 /* Transmit Queue empty */ From 39bb8ee737595e9b264d075dfcd7d86f4d3f1133 Mon Sep 17 00:00:00 2001 From: "Gabriel L. Somlo" Date: Thu, 19 Jun 2014 11:55:36 -0400 Subject: [PATCH 09/23] e1000: signal guest on successful link auto-negotiation Generate a link status change interrupt once link auto-netotiation is successfully completed. This does not affect Linux and Windows (XP and 7 tested) in any way, but is needed by the stock OS X driver (AppleIntel8254XEthernet.kext), which would otherwise fail to notice the link status change event. Signed-off-by: Gabriel Somlo Reviewed-by: Alexander Graf Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/e1000.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/net/e1000.c b/hw/net/e1000.c index d6ef802ce9..d20f8c8ff2 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -213,6 +213,7 @@ e1000_autoneg_timer(void *opaque) s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK; s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; DBGOUT(PHY, "Auto negotiation is completed\n"); + set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */ } } From d52aec95453838f2957c0cbf6ef79c51d161e87f Mon Sep 17 00:00:00 2001 From: "Gabriel L. Somlo" Date: Thu, 19 Jun 2014 11:55:35 -0400 Subject: [PATCH 10/23] e1000: move e1000_autoneg_timer() to after set_ics() Enable calling set_ics() from within e1000_autoneg_timer() without the need for a forward declaration. This patch contains no functional changes. Signed-off-by: Gabriel Somlo Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/e1000.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/hw/net/e1000.c b/hw/net/e1000.c index d20f8c8ff2..8ee5225062 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -204,19 +204,6 @@ set_phy_ctrl(E1000State *s, int index, uint16_t val) } } -static void -e1000_autoneg_timer(void *opaque) -{ - E1000State *s = opaque; - if (!qemu_get_queue(s->nic)->link_down) { - e1000_link_up(s); - s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK; - s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; - DBGOUT(PHY, "Auto negotiation is completed\n"); - set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */ - } -} - static void (*phyreg_writeops[])(E1000State *, int, uint16_t) = { [PHY_CTRL] = set_phy_ctrl, }; @@ -348,6 +335,19 @@ set_ics(E1000State *s, int index, uint32_t val) set_interrupt_cause(s, 0, val | s->mac_reg[ICR]); } +static void +e1000_autoneg_timer(void *opaque) +{ + E1000State *s = opaque; + if (!qemu_get_queue(s->nic)->link_down) { + e1000_link_up(s); + s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK; + s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; + DBGOUT(PHY, "Auto negotiation is completed\n"); + set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */ + } +} + static int rxbufsize(uint32_t v) { From d7a4155265416a1c8f3067b59e68bf5fda1d6215 Mon Sep 17 00:00:00 2001 From: "Gabriel L. Somlo" Date: Thu, 19 Jun 2014 15:40:51 -0400 Subject: [PATCH 11/23] e1000: factor out checking for auto-negotiation availability Also fix minor indentation issues in the surrounding code. Suggested-by: Michael S. Tsirkin Signed-off-by: Gabriel Somlo Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/e1000.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/hw/net/e1000.c b/hw/net/e1000.c index 8ee5225062..0fc29a0ae3 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -848,6 +848,14 @@ receive_filter(E1000State *s, const uint8_t *buf, int size) return 0; } +static bool +have_autoneg(E1000State *s) +{ + return (s->compat_flags & E1000_FLAG_AUTONEG) && + (s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN) && + (s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG); +} + static void e1000_set_link_status(NetClientState *nc) { @@ -857,9 +865,7 @@ e1000_set_link_status(NetClientState *nc) if (nc->link_down) { e1000_link_down(s); } else { - if (s->compat_flags & E1000_FLAG_AUTONEG && - s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN && - s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG && + if (have_autoneg(s) && !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { /* emulate auto-negotiation if supported */ timer_mod(s->autoneg_timer, @@ -1297,11 +1303,8 @@ static void e1000_pre_save(void *opaque) * complete auto-negotiation immediately. This allows us to look * at MII_SR_AUTONEG_COMPLETE to infer link status on load. */ - if (nc->link_down && - s->compat_flags & E1000_FLAG_AUTONEG && - s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN && - s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG) { - s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; + if (nc->link_down && have_autoneg(s)) { + s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; } } @@ -1323,12 +1326,11 @@ static int e1000_post_load(void *opaque, int version_id) * Alternatively, restart link negotiation if it was in progress. */ nc->link_down = (s->mac_reg[STATUS] & E1000_STATUS_LU) == 0; - if (s->compat_flags & E1000_FLAG_AUTONEG && - s->phy_reg[PHY_CTRL] & MII_CR_AUTO_NEG_EN && - s->phy_reg[PHY_CTRL] & MII_CR_RESTART_AUTO_NEG && + if (have_autoneg(s) && !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { nc->link_down = false; - timer_mod(s->autoneg_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); + timer_mod(s->autoneg_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); } return 0; From 684531ad1f69feb1288b9813c3eb47fba992ea96 Mon Sep 17 00:00:00 2001 From: Hu Tao Date: Fri, 20 Jun 2014 13:55:42 +0800 Subject: [PATCH 12/23] qapi/string-output-visitor: fix human output "0x1-0x10" looks better than "0x1-10" Signed-off-by: Hu Tao Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Eric Blake --- qapi/string-output-visitor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qapi/string-output-visitor.c b/qapi/string-output-visitor.c index e9aca3bfdc..1ab8574585 100644 --- a/qapi/string-output-visitor.c +++ b/qapi/string-output-visitor.c @@ -98,7 +98,7 @@ static void format_string(StringOutputVisitor *sov, Range *r, bool next, { if (r->end - r->begin > 1) { if (human) { - g_string_append_printf(sov->string, "0x%" PRIx64 "-%" PRIx64, + g_string_append_printf(sov->string, "0x%" PRIx64 "-0x%" PRIx64, r->begin, r->end - 1); } else { From 4f8586144161d5e680fdef3e09b7e8e9111c2929 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Sun, 22 Jun 2014 10:38:36 +0800 Subject: [PATCH 13/23] qemu-char: fix qemu_chr_fe_get_msgfd() Commit c76bf6bb8fbbb233a7d3641e09229d23747d5ee3 ("Add chardev API qemu_chr_fe_get_msgfds") broke qemu_chr_fe_get_msgfd() because it changed the return value. Callers expect -1 if no fd is available. The commit changed the return value to 0 (which is a valid file descriptor number) so callers always detected a file descriptor even if none was available. This patch fixes qemu-iotests 045: $ cd tests/qemu-iotests && ./check 045 [...] +FAIL: test_add_fd_invalid_fd (__main__.TestFdSets) +---------------------------------------------------------------------- +Traceback (most recent call last): + File "./045", line 123, in test_add_fd_invalid_fd + self.assert_qmp(result, 'error/class', 'GenericError') + File "/home/stefanha/qemu/tests/qemu-iotests/iotests.py", line 232, in assert_qmp + result = self.dictpath(d, path) + File "/home/stefanha/qemu/tests/qemu-iotests/iotests.py", line 211, in dictpath + self.fail('failed path traversal for "%s" in "%s"' % (path, str(d))) +AssertionError: failed path traversal for "error/class" in "{u'return': {u'fdset-id': 2, u'fd': 0}}" Cc: Nikolay Nikolaev Signed-off-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- qemu-char.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qemu-char.c b/qemu-char.c index e4eb985b57..d9100a2201 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -204,7 +204,7 @@ void qemu_chr_be_write(CharDriverState *s, uint8_t *buf, int len) int qemu_chr_fe_get_msgfd(CharDriverState *s) { int fd; - return (qemu_chr_fe_get_msgfds(s, &fd, 1) >= 0) ? fd : -1; + return (qemu_chr_fe_get_msgfds(s, &fd, 1) == 1) ? fd : -1; } int qemu_chr_fe_get_msgfds(CharDriverState *s, int *fds, int len) From d2fc39b4208709db95b6825c0e1b00ce6fbf0ecc Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Sun, 22 Jun 2014 10:38:37 +0800 Subject: [PATCH 14/23] qemu-char: avoid leaking unused fds in tcp_get_msgfds() Commit c76bf6bb8fbbb233a7d3641e09229d23747d5ee3 ("Add chardev API qemu_chr_fe_get_msgfds") extended the get_msgfds API from one to multiple file descriptors. It forgot to close unused file descriptors before freeing the file descriptor array. This patch prevents a file descriptor leak if the tcp_get_msgfds() callers requests fewer file descriptors than are available. Cc: Nikolay Nikolaev Signed-off-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- qemu-char.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/qemu-char.c b/qemu-char.c index d9100a2201..e6cbafb09c 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -2481,8 +2481,15 @@ static int tcp_get_msgfds(CharDriverState *chr, int *fds, int num) int to_copy = (s->read_msgfds_num < num) ? s->read_msgfds_num : num; if (to_copy) { + int i; + memcpy(fds, s->read_msgfds, to_copy * sizeof(int)); + /* Close unused fds */ + for (i = to_copy; i < s->read_msgfds_num; i++) { + close(s->read_msgfds[i]); + } + g_free(s->read_msgfds); s->read_msgfds = 0; s->read_msgfds_num = 0; From c7ff54825b74f27c3aac85aad540542801630d0a Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Mon, 23 Jun 2014 17:06:25 +0800 Subject: [PATCH 15/23] virtio-pci: Report an error when msix vectors init fails Currently vectors silently cleared to 0 if the initialization is failed, but user should at least have one way to notice this. Signed-off-by: Fam Zheng Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index ce97514b69..57e1e6141e 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -976,6 +976,8 @@ static void virtio_pci_device_plugged(DeviceState *d) if (proxy->nvectors && msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors, 1)) { + error_report("unable to init msix vectors to %" PRIu32, + proxy->nvectors); proxy->nvectors = 0; } From 48cb7f3c1526b4632bd63d945cac80d26616d6f5 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Fri, 13 Jun 2014 00:28:32 -0300 Subject: [PATCH 16/23] q35: Use PC_Q35_COMPAT_1_4 on pc-q35-1.4 compat_props pc-q35-1.4 was incorrectly using PC_COMPAT_1_4 instead of PC_Q35_COMPAT_1_4. The only side-effect was that the hpet compat property (inherited from PC_Q35_COMPAT_1_7) was missing. Without this patch, pc-q35-1.4 inicorrectly initializes hpet-intcap to 0xff0104 (behavior introduced in QEMU 2.0, by commit 7a10ef51c2397ac4323bc786af02c58b413b5cd2). Signed-off-by: Eduardo Habkost Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Markus Armbruster --- hw/i386/pc_q35.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index aa71332ee1..3fe3abef91 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -388,7 +388,7 @@ static QEMUMachine pc_q35_machine_v1_4 = { .name = "pc-q35-1.4", .init = pc_q35_init_1_4, .compat_props = (GlobalProperty[]) { - PC_COMPAT_1_4, + PC_Q35_COMPAT_1_4, { /* end of list */ } }, }; From e4bcd27c86d20d6f1bc06a34e6612c18534a4968 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Mon, 23 Jun 2014 17:32:47 +0300 Subject: [PATCH 17/23] hw/pcie: correct debug message Trivial issue, discovered while debugging. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 02cde6f96c..ae92f00f4a 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -224,7 +224,7 @@ static void pcie_cap_slot_hotplug_common(PCIDevice *hotplug_dev, *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap; uint16_t sltsta = pci_get_word(*exp_cap + PCI_EXP_SLTSTA); - PCIE_DEV_PRINTF(PCI_DEVICE(dev), "hotplug state: %d\n", state); + PCIE_DEV_PRINTF(PCI_DEVICE(dev), "hotplug state: 0x%x\n", sltsta); if (sltsta & PCI_EXP_SLTSTA_EIS) { /* the slot is electromechanically locked. * This error is propagated up to qdev and then to HMP/QMP. From f23b6bdc3c30c77ba0dffaa6de5e398dc3c49c51 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Mon, 23 Jun 2014 17:32:48 +0300 Subject: [PATCH 18/23] hw/pcie: implement power controller functionality It is needed by hot-unplug in order to get an indication from the OS when the device can be physically detached. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/ioh3420.c | 7 +++++++ hw/pci-bridge/xio3130_downstream.c | 7 +++++++ hw/pci/pcie.c | 33 +++++++++++++++++++++++++++++- include/hw/i386/pc.h | 10 ++++++++- include/hw/pci/pci.h | 3 +++ include/hw/pci/pcie.h | 2 ++ include/hw/pci/pcie_regs.h | 2 ++ 7 files changed, 62 insertions(+), 2 deletions(-) diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c index f4e17ac41a..7cd87fcbb4 100644 --- a/hw/pci-bridge/ioh3420.c +++ b/hw/pci-bridge/ioh3420.c @@ -180,6 +180,12 @@ PCIESlot *ioh3420_init(PCIBus *bus, int devfn, bool multifunction, return PCIE_SLOT(d); } +static Property ioh3420_props[] = { + DEFINE_PROP_BIT(COMPAT_PROP_PCP, PCIDevice, cap_present, + QEMU_PCIE_SLTCAP_PCP_BITNR, true), + DEFINE_PROP_END_OF_LIST() +}; + static const VMStateDescription vmstate_ioh3420 = { .name = "ioh-3240-express-root-port", .version_id = 1, @@ -210,6 +216,7 @@ static void ioh3420_class_init(ObjectClass *klass, void *data) dc->desc = "Intel IOH device id 3420 PCIE Root Port"; dc->reset = ioh3420_reset; dc->vmsd = &vmstate_ioh3420; + dc->props = ioh3420_props; } static const TypeInfo ioh3420_info = { diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c index 8f22f93f8e..51f20d7467 100644 --- a/hw/pci-bridge/xio3130_downstream.c +++ b/hw/pci-bridge/xio3130_downstream.c @@ -147,6 +147,12 @@ PCIESlot *xio3130_downstream_init(PCIBus *bus, int devfn, bool multifunction, return PCIE_SLOT(d); } +static Property xio3130_downstream_props[] = { + DEFINE_PROP_BIT(COMPAT_PROP_PCP, PCIDevice, cap_present, + QEMU_PCIE_SLTCAP_PCP_BITNR, true), + DEFINE_PROP_END_OF_LIST() +}; + static const VMStateDescription vmstate_xio3130_downstream = { .name = "xio3130-express-downstream-port", .version_id = 1, @@ -177,6 +183,7 @@ static void xio3130_downstream_class_init(ObjectClass *klass, void *data) dc->desc = "TI X3130 Downstream Port of PCI Express Switch"; dc->reset = xio3130_downstream_reset; dc->vmsd = &vmstate_xio3130_downstream; + dc->props = xio3130_downstream_props; } static const TypeInfo xio3130_downstream_info = { diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index ae92f00f4a..d6d9eb83ad 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -294,6 +294,15 @@ void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot) PCI_EXP_SLTCAP_AIP | PCI_EXP_SLTCAP_ABP); + if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) { + pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCAP, + PCI_EXP_SLTCAP_PCP); + pci_word_test_and_clear_mask(dev->config + pos + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PCC); + pci_word_test_and_set_mask(dev->wmask + pos + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PCC); + } + pci_word_test_and_clear_mask(dev->config + pos + PCI_EXP_SLTCTL, PCI_EXP_SLTCTL_PIC | PCI_EXP_SLTCTL_AIC); @@ -327,6 +336,10 @@ void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot) void pcie_cap_slot_reset(PCIDevice *dev) { uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + uint8_t port_type = pcie_cap_get_type(dev); + + assert(port_type == PCI_EXP_TYPE_DOWNSTREAM || + port_type == PCI_EXP_TYPE_ROOT_PORT); PCIE_DEV_PRINTF(dev, "reset\n"); @@ -339,9 +352,27 @@ void pcie_cap_slot_reset(PCIDevice *dev) PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE); pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, - PCI_EXP_SLTCTL_PIC_OFF | PCI_EXP_SLTCTL_AIC_OFF); + if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) { + bool populated; + uint16_t pic; + + /* Downstream ports enforce device number 0. */ + populated = (pci_bridge_get_sec_bus(PCI_BRIDGE(dev))->devices[0] != NULL); + + if (populated) { + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PCC); + } else { + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, + PCI_EXP_SLTCTL_PCC); + } + + pic = populated ? PCI_EXP_SLTCTL_PIC_ON : PCI_EXP_SLTCTL_PIC_OFF; + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, pic); + } + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_EIS |/* on reset, the lock is released */ diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 19f78ea336..651971d1cc 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -297,8 +297,16 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); .driver = "ICH9-LPC",\ .property = "memory-hotplug-support",\ .value = "off",\ + },{\ + .driver = "xio3130-downstream",\ + .property = COMPAT_PROP_PCP,\ + .value = "off",\ + },{\ + .driver = "ioh3420",\ + .property = COMPAT_PROP_PCP,\ + .value = "off",\ } - + #define PC_Q35_COMPAT_1_7 \ PC_COMPAT_1_7, \ PC_Q35_COMPAT_2_0, \ diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 8c25ae5d1d..c352c7b3ad 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -158,6 +158,9 @@ enum { QEMU_PCI_CAP_SHPC = (1 << QEMU_PCI_SHPC_BITNR), #define QEMU_PCI_SLOTID_BITNR 6 QEMU_PCI_CAP_SLOTID = (1 << QEMU_PCI_SLOTID_BITNR), + /* PCI Express capability - Power Controller Present */ +#define QEMU_PCIE_SLTCAP_PCP_BITNR 7 + QEMU_PCIE_SLTCAP_PCP = (1 << QEMU_PCIE_SLTCAP_PCP_BITNR), }; #define TYPE_PCI_DEVICE "pci-device" diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h index b0bf7e3ce1..7fe81f31ef 100644 --- a/include/hw/pci/pcie.h +++ b/include/hw/pci/pcie.h @@ -76,6 +76,8 @@ struct PCIExpressDevice { PCIEAERLog aer_log; }; +#define COMPAT_PROP_PCP "power_controller_present" + /* PCI express capability helper functions */ int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port); int pcie_endpoint_cap_init(PCIDevice *dev, uint8_t offset); diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h index 4d123d9fcc..652d9fc58c 100644 --- a/include/hw/pci/pcie_regs.h +++ b/include/hw/pci/pcie_regs.h @@ -57,6 +57,8 @@ #define PCI_EXP_SLTCTL_PIC_SHIFT (ffs(PCI_EXP_SLTCTL_PIC) - 1) #define PCI_EXP_SLTCTL_PIC_OFF \ (PCI_EXP_SLTCTL_IND_OFF << PCI_EXP_SLTCTL_PIC_SHIFT) +#define PCI_EXP_SLTCTL_PIC_ON \ + (PCI_EXP_SLTCTL_IND_ON << PCI_EXP_SLTCTL_PIC_SHIFT) #define PCI_EXP_SLTCTL_SUPPORTED \ (PCI_EXP_SLTCTL_ABPE | \ From 554f802da3f8b09b16b9a84ad5847b2eb0e9ad2b Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Mon, 23 Jun 2014 17:32:49 +0300 Subject: [PATCH 19/23] hw/pcie: better hotplug/hotunplug support The current code is broken: it does surprise removal which crashes guests. Reimplemented the steps: - Hotplug triggers both 'present detect change' and 'attention button pressed'. - Hotunplug starts by triggering 'attention button pressed', then waits for the OS to power off the device and only then detaches it. Fixes CVE-2014-3471. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index d6d9eb83ad..da32589393 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -258,7 +258,8 @@ void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS); - pcie_cap_slot_event(PCI_DEVICE(hotplug_dev), PCI_EXP_HP_EV_PDC); + pcie_cap_slot_event(PCI_DEVICE(hotplug_dev), + PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); } void pcie_cap_slot_hot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, @@ -268,10 +269,7 @@ void pcie_cap_slot_hot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp); - object_unparent(OBJECT(dev)); - pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDS); - pcie_cap_slot_event(PCI_DEVICE(hotplug_dev), PCI_EXP_HP_EV_PDC); + pcie_cap_slot_push_attention_button(PCI_DEVICE(hotplug_dev)); } /* pci express slot for pci express root/downstream port @@ -383,6 +381,11 @@ void pcie_cap_slot_reset(PCIDevice *dev) hotplug_event_update_event_status(dev); } +static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque) +{ + object_unparent(OBJECT(dev)); +} + void pcie_cap_slot_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) { @@ -407,6 +410,22 @@ void pcie_cap_slot_write_config(PCIDevice *dev, sltsta); } + /* + * If the slot is polulated, power indicator is off and power + * controller is off, it is safe to detach the devices. + */ + if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) && + ((val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF)) { + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); + pci_for_each_device(sec_bus, pci_bus_num(sec_bus), + pcie_unplug_device, NULL); + + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); + pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDC); + } + hotplug_event_notify(dev); /* From 20de98aff5471f4c849f456d2f9716c748a1c05c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 23 Jun 2014 17:36:55 +0300 Subject: [PATCH 20/23] pcie: coding style tweak - whitespace fix - unnecessary != 0 in a condition Cc: Marcel Apfelbaum Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index da32589393..a123c01ef1 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -353,11 +353,9 @@ void pcie_cap_slot_reset(PCIDevice *dev) PCI_EXP_SLTCTL_AIC_OFF); if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) { - bool populated; - uint16_t pic; - /* Downstream ports enforce device number 0. */ - populated = (pci_bridge_get_sec_bus(PCI_BRIDGE(dev))->devices[0] != NULL); + bool populated = pci_bridge_get_sec_bus(PCI_BRIDGE(dev))->devices[0]; + uint16_t pic; if (populated) { pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTCTL, @@ -369,7 +367,7 @@ void pcie_cap_slot_reset(PCIDevice *dev) pic = populated ? PCI_EXP_SLTCTL_PIC_ON : PCI_EXP_SLTCTL_PIC_OFF; pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, pic); - } + } pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_EIS |/* on reset, From 3c2a96699e9fc09b5712dacfe200cdaaff0bb55c Mon Sep 17 00:00:00 2001 From: Don Slutz Date: Thu, 19 Jun 2014 21:40:24 -0400 Subject: [PATCH 21/23] xen-hvm: Fix xen_hvm_init() to adjust pc memory layout This is just below_4g_mem_size and above_4g_mem_size which is used later in QEMU. Acked-by: Stefano Stabellini Signed-off-by: Don Slutz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 31 ++++++++++++++++--------------- hw/i386/pc_q35.c | 29 +++++++++++++++-------------- include/hw/xen/xen.h | 3 ++- xen-hvm-stub.c | 3 ++- xen-hvm.c | 24 ++++++++++++++---------- 5 files changed, 49 insertions(+), 41 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 3e7524b961..60057f9de4 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -99,21 +99,6 @@ static void pc_init1(MachineState *machine, FWCfgState *fw_cfg = NULL; PcGuestInfo *guest_info; - if (xen_enabled() && xen_hvm_init(&ram_memory) != 0) { - fprintf(stderr, "xen hardware virtual machine initialisation failed\n"); - exit(1); - } - - icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); - object_property_add_child(qdev_get_machine(), "icc-bridge", - OBJECT(icc_bridge), NULL); - - pc_cpus_init(machine->cpu_model, icc_bridge); - - if (kvm_enabled() && kvmclock_enabled) { - kvmclock_create(); - } - /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory). * If it doesn't, we need to split it in chunks below and above 4G. * In any case, try to make sure that guest addresses aligned at @@ -130,6 +115,22 @@ static void pc_init1(MachineState *machine, below_4g_mem_size = machine->ram_size; } + if (xen_enabled() && xen_hvm_init(&below_4g_mem_size, &above_4g_mem_size, + &ram_memory) != 0) { + fprintf(stderr, "xen hardware virtual machine initialisation failed\n"); + exit(1); + } + + icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); + object_property_add_child(qdev_get_machine(), "icc-bridge", + OBJECT(icc_bridge), NULL); + + pc_cpus_init(machine->cpu_model, icc_bridge); + + if (kvm_enabled() && kvmclock_enabled) { + kvmclock_create(); + } + if (pci_enabled) { pci_memory = g_new(MemoryRegion, 1); memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 3fe3abef91..da5fd53304 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -86,20 +86,6 @@ static void pc_q35_init(MachineState *machine) DeviceState *icc_bridge; PcGuestInfo *guest_info; - if (xen_enabled() && xen_hvm_init(&ram_memory) != 0) { - fprintf(stderr, "xen hardware virtual machine initialisation failed\n"); - exit(1); - } - - icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); - object_property_add_child(qdev_get_machine(), "icc-bridge", - OBJECT(icc_bridge), NULL); - - pc_cpus_init(machine->cpu_model, icc_bridge); - pc_acpi_init("q35-acpi-dsdt.aml"); - - kvmclock_create(); - /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping * also known as MMCFG). @@ -118,6 +104,21 @@ static void pc_q35_init(MachineState *machine) below_4g_mem_size = machine->ram_size; } + if (xen_enabled() && xen_hvm_init(&below_4g_mem_size, &above_4g_mem_size, + &ram_memory) != 0) { + fprintf(stderr, "xen hardware virtual machine initialisation failed\n"); + exit(1); + } + + icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); + object_property_add_child(qdev_get_machine(), "icc-bridge", + OBJECT(icc_bridge), NULL); + + pc_cpus_init(machine->cpu_model, icc_bridge); + pc_acpi_init("q35-acpi-dsdt.aml"); + + kvmclock_create(); + /* pci enabled */ if (pci_enabled) { pci_memory = g_new(MemoryRegion, 1); diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h index 85fda3dee4..f71f2d8963 100644 --- a/include/hw/xen/xen.h +++ b/include/hw/xen/xen.h @@ -37,10 +37,11 @@ void xen_cmos_set_s3_resume(void *opaque, int irq, int level); qemu_irq *xen_interrupt_controller_init(void); int xen_init(MachineClass *mc); -int xen_hvm_init(MemoryRegion **ram_memory); void xenstore_store_pv_console_info(int i, struct CharDriverState *chr); #if defined(NEED_CPU_H) && !defined(CONFIG_USER_ONLY) +int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size, + MemoryRegion **ram_memory); void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, struct MemoryRegion *mr); void xen_modified_memory(ram_addr_t start, ram_addr_t length); diff --git a/xen-hvm-stub.c b/xen-hvm-stub.c index 4eb27b5f2b..2d98696e72 100644 --- a/xen-hvm-stub.c +++ b/xen-hvm-stub.c @@ -51,7 +51,8 @@ void xen_modified_memory(ram_addr_t start, ram_addr_t length) { } -int xen_hvm_init(MemoryRegion **ram_memory) +int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size, + MemoryRegion **ram_memory) { return 0; } diff --git a/xen-hvm.c b/xen-hvm.c index aac38efb09..5bcebddf76 100644 --- a/xen-hvm.c +++ b/xen-hvm.c @@ -155,10 +155,11 @@ qemu_irq *xen_interrupt_controller_init(void) /* Memory Ops */ -static void xen_ram_init(ram_addr_t ram_size, MemoryRegion **ram_memory_p) +static void xen_ram_init(ram_addr_t *below_4g_mem_size, + ram_addr_t *above_4g_mem_size, + ram_addr_t ram_size, MemoryRegion **ram_memory_p) { MemoryRegion *sysmem = get_system_memory(); - ram_addr_t below_4g_mem_size, above_4g_mem_size = 0; ram_addr_t block_len; block_len = ram_size; @@ -173,10 +174,11 @@ static void xen_ram_init(ram_addr_t ram_size, MemoryRegion **ram_memory_p) vmstate_register_ram_global(&ram_memory); if (ram_size >= HVM_BELOW_4G_RAM_END) { - above_4g_mem_size = ram_size - HVM_BELOW_4G_RAM_END; - below_4g_mem_size = HVM_BELOW_4G_RAM_END; + *above_4g_mem_size = ram_size - HVM_BELOW_4G_RAM_END; + *below_4g_mem_size = HVM_BELOW_4G_RAM_END; } else { - below_4g_mem_size = ram_size; + *above_4g_mem_size = 0; + *below_4g_mem_size = ram_size; } memory_region_init_alias(&ram_640k, NULL, "xen.ram.640k", @@ -189,12 +191,13 @@ static void xen_ram_init(ram_addr_t ram_size, MemoryRegion **ram_memory_p) * the Options ROM, so it is registered here as RAM. */ memory_region_init_alias(&ram_lo, NULL, "xen.ram.lo", - &ram_memory, 0xc0000, below_4g_mem_size - 0xc0000); + &ram_memory, 0xc0000, + *below_4g_mem_size - 0xc0000); memory_region_add_subregion(sysmem, 0xc0000, &ram_lo); - if (above_4g_mem_size > 0) { + if (*above_4g_mem_size > 0) { memory_region_init_alias(&ram_hi, NULL, "xen.ram.hi", &ram_memory, 0x100000000ULL, - above_4g_mem_size); + *above_4g_mem_size); memory_region_add_subregion(sysmem, 0x100000000ULL, &ram_hi); } } @@ -958,7 +961,8 @@ static void xen_wakeup_notifier(Notifier *notifier, void *data) xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 0); } -int xen_hvm_init(MemoryRegion **ram_memory) +int xen_hvm_init(ram_addr_t *below_4g_mem_size, ram_addr_t *above_4g_mem_size, + MemoryRegion **ram_memory) { int i, rc; unsigned long ioreq_pfn; @@ -1036,7 +1040,7 @@ int xen_hvm_init(MemoryRegion **ram_memory) /* Init RAM management */ xen_map_cache_init(xen_phys_offset_to_gaddr, state); - xen_ram_init(ram_size, ram_memory); + xen_ram_init(below_4g_mem_size, above_4g_mem_size, ram_size, ram_memory); qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state); From c87b1520726f7ae1e698a41f07043d1b539ac88c Mon Sep 17 00:00:00 2001 From: Don Slutz Date: Thu, 19 Jun 2014 21:40:25 -0400 Subject: [PATCH 22/23] pc & q35: Add new machine opt max-ram-below-4g This is a pc & q35 only machine opt. If you add enough PCI devices then all mmio for them will not fit below 4G which may not be the layout the user wanted. This allows you to increase the below 4G address space that PCI devices can use (aka decrease ram below 4G) and therefore in more cases not have any mmio that is above 4G. For example using "-machine pc,max-ram-below-4g=2G" on the command line will limit the amount of ram that is below 4G to 2G. Note: this machine option cannot be used to increase the amount of ram below 4G. Signed-off-by: Don Slutz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin MST: fix 32 bit --- hw/i386/pc.c | 47 ++++++++++++++++++++++++++++++++++++++++++++ hw/i386/pc_piix.c | 22 ++++++++++++++++++++- hw/i386/pc_q35.c | 22 ++++++++++++++++++++- include/hw/i386/pc.h | 3 +++ vl.c | 4 ++++ 5 files changed, 96 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 67eb45089e..2cf22b1293 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1643,11 +1643,58 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque, visit_type_int(v, &value, name, errp); } +static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v, + void *opaque, const char *name, + Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + uint64_t value = pcms->max_ram_below_4g; + + visit_type_size(v, &value, name, errp); +} + +static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v, + void *opaque, const char *name, + Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + Error *error = NULL; + uint64_t value; + + visit_type_size(v, &value, name, &error); + if (error) { + error_propagate(errp, error); + return; + } + if (value > (1ULL << 32)) { + error_set(&error, ERROR_CLASS_GENERIC_ERROR, + "Machine option 'max-ram-below-4g=%"PRIu64 + "' expects size less than or equal to 4G", value); + error_propagate(errp, error); + return; + } + + if (value < (1ULL << 20)) { + error_report("Warning: small max_ram_below_4g(%"PRIu64 + ") less than 1M. BIOS may not work..", + value); + } + + pcms->max_ram_below_4g = value; +} + static void pc_machine_initfn(Object *obj) { + PCMachineState *pcms = PC_MACHINE(obj); + object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int", pc_machine_get_hotplug_memory_region_size, NULL, NULL, NULL, NULL); + pcms->max_ram_below_4g = 1ULL << 32; /* 4G */ + object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G, "size", + pc_machine_get_max_ram_below_4g, + pc_machine_set_max_ram_below_4g, + NULL, NULL, NULL); } static void pc_machine_class_init(ObjectClass *oc, void *data) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 60057f9de4..47546b72ae 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -48,6 +48,7 @@ #include "exec/address-spaces.h" #include "hw/acpi/acpi.h" #include "cpu.h" +#include "qemu/error-report.h" #ifdef CONFIG_XEN # include #endif @@ -98,6 +99,7 @@ static void pc_init1(MachineState *machine, DeviceState *icc_bridge; FWCfgState *fw_cfg = NULL; PcGuestInfo *guest_info; + ram_addr_t lowmem; /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory). * If it doesn't, we need to split it in chunks below and above 4G. @@ -107,7 +109,25 @@ static void pc_init1(MachineState *machine, * breaking migration. */ if (machine->ram_size >= 0xe0000000) { - ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000; + lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000; + } else { + lowmem = 0xe0000000; + } + + /* Handle the machine opt max-ram-below-4g. It is basicly doing + * min(qemu limit, user limit). + */ + if (lowmem > pc_machine->max_ram_below_4g) { + lowmem = pc_machine->max_ram_below_4g; + if (machine->ram_size - lowmem > lowmem && + lowmem & ((1ULL << 30) - 1)) { + error_report("Warning: Large machine and max_ram_below_4g(%"PRIu64 + ") not a multiple of 1G; possible bad performance.", + pc_machine->max_ram_below_4g); + } + } + + if (machine->ram_size >= lowmem) { above_4g_mem_size = machine->ram_size - lowmem; below_4g_mem_size = lowmem; } else { diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index da5fd53304..155db99f63 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -44,6 +44,7 @@ #include "hw/ide/ahci.h" #include "hw/usb.h" #include "hw/cpu/icc_bus.h" +#include "qemu/error-report.h" /* ICH9 AHCI has 6 ports */ #define MAX_SATA_PORTS 6 @@ -85,6 +86,7 @@ static void pc_q35_init(MachineState *machine) PCIDevice *ahci; DeviceState *icc_bridge; PcGuestInfo *guest_info; + ram_addr_t lowmem; /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping @@ -96,7 +98,25 @@ static void pc_q35_init(MachineState *machine) * breaking migration. */ if (machine->ram_size >= 0xb0000000) { - ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000; + lowmem = gigabyte_align ? 0x80000000 : 0xb0000000; + } else { + lowmem = 0xb0000000; + } + + /* Handle the machine opt max-ram-below-4g. It is basicly doing + * min(qemu limit, user limit). + */ + if (lowmem > pc_machine->max_ram_below_4g) { + lowmem = pc_machine->max_ram_below_4g; + if (machine->ram_size - lowmem > lowmem && + lowmem & ((1ULL << 30) - 1)) { + error_report("Warning: Large machine and max_ram_below_4g(%"PRIu64 + ") not a multiple of 1G; possible bad performance.", + pc_machine->max_ram_below_4g); + } + } + + if (machine->ram_size >= lowmem) { above_4g_mem_size = machine->ram_size - lowmem; below_4g_mem_size = lowmem; } else { diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 651971d1cc..486e98feac 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -33,10 +33,13 @@ struct PCMachineState { MemoryRegion hotplug_memory; HotplugHandler *acpi_dev; + + uint64_t max_ram_below_4g; }; #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size" +#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g" /** * PCMachineClass: diff --git a/vl.c b/vl.c index ab8f15243b..194c6f79a3 100644 --- a/vl.c +++ b/vl.c @@ -381,6 +381,10 @@ static QemuOptsList qemu_machine_opts = { .name = "kvm-type", .type = QEMU_OPT_STRING, .help = "Specifies the KVM virtualization mode (HV, PR)", + },{ + .name = PC_MACHINE_MAX_RAM_BELOW_4G, + .type = QEMU_OPT_SIZE, + .help = "maximum ram below the 4G boundary (32bit boundary)", }, { /* End of list */ } }, From c4f5cdc53f181f6fe84a0f1bf99914598934a8a6 Mon Sep 17 00:00:00 2001 From: Don Slutz Date: Thu, 19 Jun 2014 21:40:26 -0400 Subject: [PATCH 23/23] xen-hvm: Handle machine opt max-ram-below-4g This is the xen part of "pc & q35: Add new machine opt max-ram-below-4g" Note: this machine option cannot be used to increase the amount of ram below 4G. Signed-off-by: Don Slutz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- xen-hvm.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/xen-hvm.c b/xen-hvm.c index 5bcebddf76..bafdf1283f 100644 --- a/xen-hvm.c +++ b/xen-hvm.c @@ -161,25 +161,36 @@ static void xen_ram_init(ram_addr_t *below_4g_mem_size, { MemoryRegion *sysmem = get_system_memory(); ram_addr_t block_len; + uint64_t user_lowmem = object_property_get_int(qdev_get_machine(), + PC_MACHINE_MAX_RAM_BELOW_4G, + &error_abort); - block_len = ram_size; - if (ram_size >= HVM_BELOW_4G_RAM_END) { - /* Xen does not allocate the memory continuously, and keep a hole at - * HVM_BELOW_4G_MMIO_START of HVM_BELOW_4G_MMIO_LENGTH - */ - block_len += HVM_BELOW_4G_MMIO_LENGTH; + /* Handle the machine opt max-ram-below-4g. It is basicly doing + * min(xen limit, user limit). + */ + if (HVM_BELOW_4G_RAM_END <= user_lowmem) { + user_lowmem = HVM_BELOW_4G_RAM_END; } - memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len); - *ram_memory_p = &ram_memory; - vmstate_register_ram_global(&ram_memory); - if (ram_size >= HVM_BELOW_4G_RAM_END) { - *above_4g_mem_size = ram_size - HVM_BELOW_4G_RAM_END; - *below_4g_mem_size = HVM_BELOW_4G_RAM_END; + if (ram_size >= user_lowmem) { + *above_4g_mem_size = ram_size - user_lowmem; + *below_4g_mem_size = user_lowmem; } else { *above_4g_mem_size = 0; *below_4g_mem_size = ram_size; } + if (!*above_4g_mem_size) { + block_len = ram_size; + } else { + /* + * Xen does not allocate the memory continuously, it keeps a + * hole of the size computed above or passed in. + */ + block_len = (1ULL << 32) + *above_4g_mem_size; + } + memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len); + *ram_memory_p = &ram_memory; + vmstate_register_ram_global(&ram_memory); memory_region_init_alias(&ram_640k, NULL, "xen.ram.640k", &ram_memory, 0, 0xa0000);