From 2f2b18f60bf17453b4c01197a9316615a3c1f1de Mon Sep 17 00:00:00 2001 From: Zheng Xiang Date: Mon, 3 Dec 2018 15:05:17 +0800 Subject: [PATCH 01/44] pcie: set link state inactive/active after hot unplug/plug When VM boots from the latest version of linux kernel, after hot-unpluging virtio-blk disks which are hotplugged into pcie-root-port, the VM's dmesg log shows: [ 151.046242] pciehp 0000:00:05.0:pcie004: pending interrupts 0x0001 from Slot Status [ 151.046365] pciehp 0000:00:05.0:pcie004: Slot(0-3): Attention button pressed [ 151.046369] pciehp 0000:00:05.0:pcie004: Slot(0-3): Powering off due to button press [ 151.046420] pciehp 0000:00:05.0:pcie004: pending interrupts 0x0010 from Slot Status [ 151.046425] pciehp 0000:00:05.0:pcie004: pciehp_green_led_blink: SLOTCTRL a8 write cmd 200 [ 151.046464] pciehp 0000:00:05.0:pcie004: pending interrupts 0x0010 from Slot Status [ 151.046468] pciehp 0000:00:05.0:pcie004: pciehp_set_attention_status: SLOTCTRL a8 write cmd c0 [ 156.163421] pciehp 0000:00:05.0:pcie004: pciehp_get_power_status: SLOTCTRL a8 value read 2f1 [ 156.163427] pciehp 0000:00:05.0:pcie004: pciehp_unconfigure_device: domain:bus:dev = 0000:06:00 [ 156.198736] pciehp 0000:00:05.0:pcie004: pending interrupts 0x0010 from Slot Status [ 156.198772] pciehp 0000:00:05.0:pcie004: pciehp_power_off_slot: SLOTCTRL a8 write cmd 400 [ 157.224124] pciehp 0000:00:05.0:pcie004: pending interrupts 0x0018 from Slot Status [ 157.224194] pciehp 0000:00:05.0:pcie004: pciehp_green_led_off: SLOTCTRL a8 write cmd 300 [ 157.224220] pciehp 0000:00:05.0:pcie004: pciehp_check_link_active: lnk_status = 2011 [ 157.224223] pciehp 0000:00:05.0:pcie004: Slot(0-3): Link Up [ 157.224233] pciehp 0000:00:05.0:pcie004: pciehp_get_power_status: SLOTCTRL a8 value read 7f1 [ 157.224281] pciehp 0000:00:05.0:pcie004: pending interrupts 0x0010 from Slot Status [ 157.224285] pciehp 0000:00:05.0:pcie004: pciehp_power_on_slot: SLOTCTRL a8 write cmd 0 [ 157.224300] pciehp 0000:00:05.0:pcie004: __pciehp_link_set: lnk_ctrl = 0 [ 157.224336] pciehp 0000:00:05.0:pcie004: pending interrupts 0x0010 from Slot Status [ 157.224339] pciehp 0000:00:05.0:pcie004: pciehp_green_led_blink: SLOTCTRL a8 write cmd 200 [ 159.739294] pci 0000:06:00.0 id reading try 50 times with interval 20 ms to get ffffffff [ 159.739315] pciehp 0000:00:05.0:pcie004: pciehp_check_link_status: lnk_status = 2011 [ 159.739318] pciehp 0000:00:05.0:pcie004: Failed to check link status [ 159.739371] pciehp 0000:00:05.0:pcie004: pending interrupts 0x0010 from Slot Status [ 159.739394] pciehp 0000:00:05.0:pcie004: pciehp_power_off_slot: SLOTCTRL a8 write cmd 400 [ 160.771426] pciehp 0000:00:05.0:pcie004: pending interrupts 0x0010 from Slot Status [ 160.771452] pciehp 0000:00:05.0:pcie004: pciehp_green_led_off: SLOTCTRL a8 write cmd 300 [ 160.771495] pciehp 0000:00:05.0:pcie004: pending interrupts 0x0010 from Slot Status [ 160.771499] pciehp 0000:00:05.0:pcie004: pciehp_set_attention_status: SLOTCTRL a8 write cmd 40 [ 160.771535] pciehp 0000:00:05.0:pcie004: pending interrupts 0x0010 from Slot Status [ 160.771539] pciehp 0000:00:05.0:pcie004: pciehp_green_led_off: SLOTCTRL a8 write cmd 300 After analyzing the log information, it seems that qemu doesn't change the Link Status from active to inactive after hot-unplug. This results in the abnormal log after the linux kernel commit d331710ea78fea merged. Furthermore, If I hotplug the same virtio-blk disk after hot-unplug, the virtio-blk would turn on and then back off. So this patch set the Link Status inactive after hot-unplug and active after hot-plug. Signed-off-by: Zheng Xiang Signed-off-by: Zheng Xiang Cc: Wang Haibin Cc: qemu-stable@nongnu.org Reviewed-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 6c91bd44a0..66b73b87c8 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -345,6 +345,10 @@ void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, if (!dev->hotplugged) { pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS); + if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { + pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } return; } @@ -355,6 +359,10 @@ void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, if (pci_get_function_0(pci_dev)) { pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS); + if (pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { + pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } pcie_cap_slot_event(PCI_DEVICE(hotplug_dev), PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); } @@ -531,6 +539,10 @@ void pcie_cap_slot_write_config(PCIDevice *dev, pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS); + if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + } pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDC); } From 2b4e573c7c7b9a698ba6931ba456bbd8d3d8c84c Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 26 Nov 2018 12:28:44 -0600 Subject: [PATCH 02/44] pc:piix4: Update smbus I/O space after a migration Otherwise it won't be set up correctly and won't work after miigration. Signed-off-by: Corey Minyard Cc: Igor Mammedov Cc: qemu-stable@nongnu.org Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/piix4.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index e330f24c71..2f4dd03b83 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -173,6 +173,7 @@ static int vmstate_acpi_post_load(void *opaque, int version_id) PIIX4PMState *s = opaque; pm_io_space_update(s); + smbus_io_space_update(s); return 0; } From a4ee4c8baa37154f42b4dc6a13fee79268d15238 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 5 Dec 2018 17:57:03 -0200 Subject: [PATCH 03/44] virtio: Helper for registering virtio device types Introduce a helper for registering different flavours of virtio devices. Convert code to use the helper, but keep only the existing generic types. Transitional and non-transitional device types will be added by another patch. Acked-by: Andrea Bolognani Reviewed-by: Cornelia Huck Signed-off-by: Eduardo Habkost Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/display/virtio-gpu-pci.c | 7 +- hw/display/virtio-vga.c | 7 +- hw/virtio/virtio-crypto-pci.c | 7 +- hw/virtio/virtio-pci.c | 235 ++++++++++++++++++++++++---------- hw/virtio/virtio-pci.h | 54 ++++++++ 5 files changed, 230 insertions(+), 80 deletions(-) diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c index cece4aa495..faf76a8bc4 100644 --- a/hw/display/virtio-gpu-pci.c +++ b/hw/display/virtio-gpu-pci.c @@ -69,9 +69,8 @@ static void virtio_gpu_initfn(Object *obj) TYPE_VIRTIO_GPU); } -static const TypeInfo virtio_gpu_pci_info = { - .name = TYPE_VIRTIO_GPU_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_gpu_pci_info = { + .generic_name = TYPE_VIRTIO_GPU_PCI, .instance_size = sizeof(VirtIOGPUPCI), .instance_init = virtio_gpu_initfn, .class_init = virtio_gpu_pci_class_init, @@ -79,6 +78,6 @@ static const TypeInfo virtio_gpu_pci_info = { static void virtio_gpu_pci_register_types(void) { - type_register_static(&virtio_gpu_pci_info); + virtio_pci_types_register(&virtio_gpu_pci_info); } type_init(virtio_gpu_pci_register_types) diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c index ab2e369b28..8db4d916f2 100644 --- a/hw/display/virtio-vga.c +++ b/hw/display/virtio-vga.c @@ -207,9 +207,8 @@ static void virtio_vga_inst_initfn(Object *obj) TYPE_VIRTIO_GPU); } -static TypeInfo virtio_vga_info = { - .name = TYPE_VIRTIO_VGA, - .parent = TYPE_VIRTIO_PCI, +static VirtioPCIDeviceTypeInfo virtio_vga_info = { + .generic_name = TYPE_VIRTIO_VGA, .instance_size = sizeof(struct VirtIOVGA), .instance_init = virtio_vga_inst_initfn, .class_init = virtio_vga_class_init, @@ -217,7 +216,7 @@ static TypeInfo virtio_vga_info = { static void virtio_vga_register_types(void) { - type_register_static(&virtio_vga_info); + virtio_pci_types_register(&virtio_vga_info); } type_init(virtio_vga_register_types) diff --git a/hw/virtio/virtio-crypto-pci.c b/hw/virtio/virtio-crypto-pci.c index bf64996e48..8cc3fa3ef7 100644 --- a/hw/virtio/virtio-crypto-pci.c +++ b/hw/virtio/virtio-crypto-pci.c @@ -64,9 +64,8 @@ static void virtio_crypto_initfn(Object *obj) TYPE_VIRTIO_CRYPTO); } -static const TypeInfo virtio_crypto_pci_info = { - .name = TYPE_VIRTIO_CRYPTO_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_crypto_pci_info = { + .generic_name = TYPE_VIRTIO_CRYPTO_PCI, .instance_size = sizeof(VirtIOCryptoPCI), .instance_init = virtio_crypto_initfn, .class_init = virtio_crypto_pci_class_init, @@ -74,6 +73,6 @@ static const TypeInfo virtio_crypto_pci_info = { static void virtio_crypto_pci_register_types(void) { - type_register_static(&virtio_crypto_pci_info); + virtio_pci_types_register(&virtio_crypto_pci_info); } type_init(virtio_crypto_pci_register_types) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index a954799267..f07ec55c38 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1119,9 +1119,8 @@ static void virtio_9p_pci_instance_init(Object *obj) TYPE_VIRTIO_9P); } -static const TypeInfo virtio_9p_pci_info = { - .name = TYPE_VIRTIO_9P_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_9p_pci_info = { + .generic_name = TYPE_VIRTIO_9P_PCI, .instance_size = sizeof(V9fsPCIState), .instance_init = virtio_9p_pci_instance_init, .class_init = virtio_9p_pci_class_init, @@ -1877,9 +1876,6 @@ static void virtio_pci_reset(DeviceState *qdev) static Property virtio_pci_properties[] = { DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false), - DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy, - ON_OFF_AUTO_AUTO), - DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false), DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true), DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags, @@ -1939,13 +1935,123 @@ static const TypeInfo virtio_pci_info = { .class_init = virtio_pci_class_init, .class_size = sizeof(VirtioPCIClass), .abstract = true, - .interfaces = (InterfaceInfo[]) { - { INTERFACE_PCIE_DEVICE }, - { INTERFACE_CONVENTIONAL_PCI_DEVICE }, - { } - }, }; +static Property virtio_pci_generic_properties[] = { + DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static void virtio_pci_base_class_init(ObjectClass *klass, void *data) +{ + const VirtioPCIDeviceTypeInfo *t = data; + if (t->class_init) { + t->class_init(klass, NULL); + } +} + +static void virtio_pci_generic_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->props = virtio_pci_generic_properties; +} + +/* Used when the generic type and the base type is the same */ +static void virtio_pci_generic_base_class_init(ObjectClass *klass, void *data) +{ + virtio_pci_base_class_init(klass, data); + virtio_pci_generic_class_init(klass, NULL); +} + +static void virtio_pci_transitional_instance_init(Object *obj) +{ + VirtIOPCIProxy *proxy = VIRTIO_PCI(obj); + + proxy->disable_legacy = ON_OFF_AUTO_OFF; + proxy->disable_modern = false; +} + +static void virtio_pci_non_transitional_instance_init(Object *obj) +{ + VirtIOPCIProxy *proxy = VIRTIO_PCI(obj); + + proxy->disable_legacy = ON_OFF_AUTO_ON; + proxy->disable_modern = false; +} + +void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) +{ + TypeInfo base_type_info = { + .name = t->base_name, + .parent = t->parent ? t->parent : TYPE_VIRTIO_PCI, + .instance_size = t->instance_size, + .instance_init = t->instance_init, + .class_init = virtio_pci_base_class_init, + .class_data = (void *)t, + .abstract = true, + }; + TypeInfo generic_type_info = { + .name = t->generic_name, + .parent = base_type_info.name, + .class_init = virtio_pci_generic_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { } + }, + }; + + if (!base_type_info.name) { + /* No base type -> register a single generic device type */ + base_type_info.name = t->generic_name; + base_type_info.class_init = virtio_pci_generic_base_class_init; + base_type_info.interfaces = generic_type_info.interfaces; + base_type_info.abstract = false; + generic_type_info.name = NULL; + assert(!t->non_transitional_name); + assert(!t->transitional_name); + } + + type_register(&base_type_info); + if (generic_type_info.name) { + type_register(&generic_type_info); + } + + if (t->non_transitional_name) { + const TypeInfo non_transitional_type_info = { + .name = t->non_transitional_name, + .parent = base_type_info.name, + .instance_init = virtio_pci_non_transitional_instance_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { } + }, + }; + type_register(&non_transitional_type_info); + } + + if (t->transitional_name) { + const TypeInfo transitional_type_info = { + .name = t->transitional_name, + .parent = base_type_info.name, + .instance_init = virtio_pci_transitional_instance_init, + .interfaces = (InterfaceInfo[]) { + /* + * Transitional virtio devices work only as Conventional PCI + * devices because they require PIO ports. + */ + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { } + }, + }; + type_register(&transitional_type_info); + } +} + /* virtio-blk-pci */ static Property virtio_blk_pci_properties[] = { @@ -1995,9 +2101,8 @@ static void virtio_blk_pci_instance_init(Object *obj) "bootindex", &error_abort); } -static const TypeInfo virtio_blk_pci_info = { - .name = TYPE_VIRTIO_BLK_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_blk_pci_info = { + .generic_name = TYPE_VIRTIO_BLK_PCI, .instance_size = sizeof(VirtIOBlkPCI), .instance_init = virtio_blk_pci_instance_init, .class_init = virtio_blk_pci_class_init, @@ -2051,9 +2156,8 @@ static void vhost_user_blk_pci_instance_init(Object *obj) "bootindex", &error_abort); } -static const TypeInfo vhost_user_blk_pci_info = { - .name = TYPE_VHOST_USER_BLK_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo vhost_user_blk_pci_info = { + .generic_name = TYPE_VHOST_USER_BLK_PCI, .instance_size = sizeof(VHostUserBlkPCI), .instance_init = vhost_user_blk_pci_instance_init, .class_init = vhost_user_blk_pci_class_init, @@ -2119,9 +2223,8 @@ static void virtio_scsi_pci_instance_init(Object *obj) TYPE_VIRTIO_SCSI); } -static const TypeInfo virtio_scsi_pci_info = { - .name = TYPE_VIRTIO_SCSI_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_scsi_pci_info = { + .generic_name = TYPE_VIRTIO_SCSI_PCI, .instance_size = sizeof(VirtIOSCSIPCI), .instance_init = virtio_scsi_pci_instance_init, .class_init = virtio_scsi_pci_class_init, @@ -2174,9 +2277,8 @@ static void vhost_scsi_pci_instance_init(Object *obj) "bootindex", &error_abort); } -static const TypeInfo vhost_scsi_pci_info = { - .name = TYPE_VHOST_SCSI_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo vhost_scsi_pci_info = { + .generic_name = TYPE_VHOST_SCSI_PCI, .instance_size = sizeof(VHostSCSIPCI), .instance_init = vhost_scsi_pci_instance_init, .class_init = vhost_scsi_pci_class_init, @@ -2229,9 +2331,8 @@ static void vhost_user_scsi_pci_instance_init(Object *obj) "bootindex", &error_abort); } -static const TypeInfo vhost_user_scsi_pci_info = { - .name = TYPE_VHOST_USER_SCSI_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo vhost_user_scsi_pci_info = { + .generic_name = TYPE_VHOST_USER_SCSI_PCI, .instance_size = sizeof(VHostUserSCSIPCI), .instance_init = vhost_user_scsi_pci_instance_init, .class_init = vhost_user_scsi_pci_class_init, @@ -2277,9 +2378,8 @@ static void vhost_vsock_pci_instance_init(Object *obj) TYPE_VHOST_VSOCK); } -static const TypeInfo vhost_vsock_pci_info = { - .name = TYPE_VHOST_VSOCK_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo vhost_vsock_pci_info = { + .generic_name = TYPE_VHOST_VSOCK_PCI, .instance_size = sizeof(VHostVSockPCI), .instance_init = vhost_vsock_pci_instance_init, .class_init = vhost_vsock_pci_class_init, @@ -2334,9 +2434,8 @@ static void virtio_balloon_pci_instance_init(Object *obj) "guest-stats-polling-interval", &error_abort); } -static const TypeInfo virtio_balloon_pci_info = { - .name = TYPE_VIRTIO_BALLOON_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_balloon_pci_info = { + .generic_name = TYPE_VIRTIO_BALLOON_PCI, .instance_size = sizeof(VirtIOBalloonPCI), .instance_init = virtio_balloon_pci_instance_init, .class_init = virtio_balloon_pci_class_init, @@ -2407,9 +2506,8 @@ static void virtio_serial_pci_instance_init(Object *obj) TYPE_VIRTIO_SERIAL); } -static const TypeInfo virtio_serial_pci_info = { - .name = TYPE_VIRTIO_SERIAL_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_serial_pci_info = { + .generic_name = TYPE_VIRTIO_SERIAL_PCI, .instance_size = sizeof(VirtIOSerialPCI), .instance_init = virtio_serial_pci_instance_init, .class_init = virtio_serial_pci_class_init, @@ -2462,9 +2560,8 @@ static void virtio_net_pci_instance_init(Object *obj) "bootindex", &error_abort); } -static const TypeInfo virtio_net_pci_info = { - .name = TYPE_VIRTIO_NET_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_net_pci_info = { + .generic_name = TYPE_VIRTIO_NET_PCI, .instance_size = sizeof(VirtIONetPCI), .instance_init = virtio_net_pci_instance_init, .class_init = virtio_net_pci_class_init, @@ -2513,9 +2610,8 @@ static void virtio_rng_initfn(Object *obj) TYPE_VIRTIO_RNG); } -static const TypeInfo virtio_rng_pci_info = { - .name = TYPE_VIRTIO_RNG_PCI, - .parent = TYPE_VIRTIO_PCI, +static const VirtioPCIDeviceTypeInfo virtio_rng_pci_info = { + .generic_name = TYPE_VIRTIO_RNG_PCI, .instance_size = sizeof(VirtIORngPCI), .instance_init = virtio_rng_initfn, .class_init = virtio_rng_pci_class_init, @@ -2605,24 +2701,24 @@ static const TypeInfo virtio_input_hid_pci_info = { .abstract = true, }; -static const TypeInfo virtio_keyboard_pci_info = { - .name = TYPE_VIRTIO_KEYBOARD_PCI, +static const VirtioPCIDeviceTypeInfo virtio_keyboard_pci_info = { + .generic_name = TYPE_VIRTIO_KEYBOARD_PCI, .parent = TYPE_VIRTIO_INPUT_HID_PCI, .class_init = virtio_input_hid_kbd_pci_class_init, .instance_size = sizeof(VirtIOInputHIDPCI), .instance_init = virtio_keyboard_initfn, }; -static const TypeInfo virtio_mouse_pci_info = { - .name = TYPE_VIRTIO_MOUSE_PCI, +static const VirtioPCIDeviceTypeInfo virtio_mouse_pci_info = { + .generic_name = TYPE_VIRTIO_MOUSE_PCI, .parent = TYPE_VIRTIO_INPUT_HID_PCI, .class_init = virtio_input_hid_mouse_pci_class_init, .instance_size = sizeof(VirtIOInputHIDPCI), .instance_init = virtio_mouse_initfn, }; -static const TypeInfo virtio_tablet_pci_info = { - .name = TYPE_VIRTIO_TABLET_PCI, +static const VirtioPCIDeviceTypeInfo virtio_tablet_pci_info = { + .generic_name = TYPE_VIRTIO_TABLET_PCI, .parent = TYPE_VIRTIO_INPUT_HID_PCI, .instance_size = sizeof(VirtIOInputHIDPCI), .instance_init = virtio_tablet_initfn, @@ -2637,8 +2733,8 @@ static void virtio_host_initfn(Object *obj) TYPE_VIRTIO_INPUT_HOST); } -static const TypeInfo virtio_host_pci_info = { - .name = TYPE_VIRTIO_INPUT_HOST_PCI, +static const VirtioPCIDeviceTypeInfo virtio_host_pci_info = { + .generic_name = TYPE_VIRTIO_INPUT_HOST_PCI, .parent = TYPE_VIRTIO_INPUT_PCI, .instance_size = sizeof(VirtIOInputHostPCI), .instance_init = virtio_host_initfn, @@ -2692,36 +2788,39 @@ static const TypeInfo virtio_pci_bus_info = { static void virtio_pci_register_types(void) { - type_register_static(&virtio_rng_pci_info); - type_register_static(&virtio_input_pci_info); - type_register_static(&virtio_input_hid_pci_info); - type_register_static(&virtio_keyboard_pci_info); - type_register_static(&virtio_mouse_pci_info); - type_register_static(&virtio_tablet_pci_info); -#ifdef CONFIG_LINUX - type_register_static(&virtio_host_pci_info); -#endif + /* Base types: */ type_register_static(&virtio_pci_bus_info); type_register_static(&virtio_pci_info); + type_register_static(&virtio_input_pci_info); + type_register_static(&virtio_input_hid_pci_info); + + /* Implementations: */ + virtio_pci_types_register(&virtio_rng_pci_info); + virtio_pci_types_register(&virtio_keyboard_pci_info); + virtio_pci_types_register(&virtio_mouse_pci_info); + virtio_pci_types_register(&virtio_tablet_pci_info); +#ifdef CONFIG_LINUX + virtio_pci_types_register(&virtio_host_pci_info); +#endif #ifdef CONFIG_VIRTFS - type_register_static(&virtio_9p_pci_info); + virtio_pci_types_register(&virtio_9p_pci_info); #endif - type_register_static(&virtio_blk_pci_info); + virtio_pci_types_register(&virtio_blk_pci_info); #if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) - type_register_static(&vhost_user_blk_pci_info); + virtio_pci_types_register(&vhost_user_blk_pci_info); #endif - type_register_static(&virtio_scsi_pci_info); - type_register_static(&virtio_balloon_pci_info); - type_register_static(&virtio_serial_pci_info); - type_register_static(&virtio_net_pci_info); + virtio_pci_types_register(&virtio_scsi_pci_info); + virtio_pci_types_register(&virtio_balloon_pci_info); + virtio_pci_types_register(&virtio_serial_pci_info); + virtio_pci_types_register(&virtio_net_pci_info); #ifdef CONFIG_VHOST_SCSI - type_register_static(&vhost_scsi_pci_info); + virtio_pci_types_register(&vhost_scsi_pci_info); #endif #if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) - type_register_static(&vhost_user_scsi_pci_info); + virtio_pci_types_register(&vhost_user_scsi_pci_info); #endif #ifdef CONFIG_VHOST_VSOCK - type_register_static(&vhost_vsock_pci_info); + virtio_pci_types_register(&vhost_vsock_pci_info); #endif } diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 813082b0d7..8cd546608e 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -417,4 +417,58 @@ struct VirtIOCryptoPCI { /* Virtio ABI version, if we increment this, we break the guest driver. */ #define VIRTIO_PCI_ABI_VERSION 0 +/* Input for virtio_pci_types_register() */ +typedef struct VirtioPCIDeviceTypeInfo { + /* + * Common base class for the subclasses below. + * + * Required only if transitional_name or non_transitional_name is set. + * + * We need a separate base type instead of making all types + * inherit from generic_name for two reasons: + * 1) generic_name implements INTERFACE_PCIE_DEVICE, but + * transitional_name does not. + * 2) generic_name has the "disable-legacy" and "disable-modern" + * properties, transitional_name and non_transitional name don't. + */ + const char *base_name; + /* + * Generic device type. Optional. + * + * Supports both transitional and non-transitional modes, + * using the disable-legacy and disable-modern properties. + * If disable-legacy=auto, (non-)transitional mode is selected + * depending on the bus where the device is plugged. + * + * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE, + * but PCI Express is supported only in non-transitional mode. + * + * The only type implemented by QEMU 3.1 and older. + */ + const char *generic_name; + /* + * The transitional device type. Optional. + * + * Implements both INTERFACE_PCIE_DEVICE and INTERFACE_CONVENTIONAL_PCI_DEVICE. + */ + const char *transitional_name; + /* + * The non-transitional device type. Optional. + * + * Implements INTERFACE_CONVENTIONAL_PCI_DEVICE only. + */ + const char *non_transitional_name; + + /* Parent type. If NULL, TYPE_VIRTIO_PCI is used */ + const char *parent; + + /* Same as TypeInfo fields: */ + size_t instance_size; + void (*instance_init)(Object *obj); + void (*class_init)(ObjectClass *klass, void *data); +} VirtioPCIDeviceTypeInfo; + +/* Register virtio-pci type(s). @t must be static. */ +void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); + #endif From f6e501a28ef9b69f6df6252160aa87876cc92a1a Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 5 Dec 2018 17:57:04 -0200 Subject: [PATCH 04/44] virtio: Provide version-specific variants of virtio PCI devices Many of the current virtio-*-pci device types actually represent 3 different types of devices: * virtio 1.0 non-transitional devices * virtio 1.0 transitional devices * virtio 0.9 ("legacy device" in virtio 1.0 terminology) That would be just an annoyance if it didn't break our device/bus compatibility QMP interfaces. With these multi-purpose device types, there's no way to tell management software that transitional devices and legacy devices require a Conventional PCI bus. The multi-purpose device types would also prevent us from telling management software what's the PCI vendor/device ID for them, because their PCI IDs change at runtime depending on the bus where they were plugged. This patch adds separate device types for each of those virtio device flavors: - virtio-*-pci: the existing multi-purpose device types - Configurable using `disable-legacy` and `disable-modern` properties - Legacy driver support is automatically enabled/disabled depending on the bus where it is plugged - Supports Conventional PCI and PCI Express buses (but Conventional PCI is incompatible with disable-legacy=off) - Changes PCI vendor/device IDs at runtime - virtio-*-pci-transitional: virtio-1.0 device supporting legacy drivers - Supports Conventional PCI buses only, because it has a PIO BAR - virtio-*-pci-non-transitional: modern-only - Supports both Conventional PCI and PCI Express buses The existing TYPE_* macros for these types will point to an abstract base type, so existing casts in the code will keep working for all variants. A simple test script (tests/acceptance/virtio_version.py) is included, to check if the new device types are equivalent to using the `disable-legacy` and `disable-modern` options. Acked-by: Andrea Bolognani Reviewed-by: Cornelia Huck Signed-off-by: Eduardo Habkost Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 60 ++++++++-- hw/virtio/virtio-pci.h | 24 ++-- tests/acceptance/virtio_version.py | 176 +++++++++++++++++++++++++++++ 3 files changed, 236 insertions(+), 24 deletions(-) create mode 100644 tests/acceptance/virtio_version.py diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index f07ec55c38..d05066deb8 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1120,7 +1120,10 @@ static void virtio_9p_pci_instance_init(Object *obj) } static const VirtioPCIDeviceTypeInfo virtio_9p_pci_info = { - .generic_name = TYPE_VIRTIO_9P_PCI, + .base_name = TYPE_VIRTIO_9P_PCI, + .generic_name = "virtio-9p-pci", + .transitional_name = "virtio-9p-pci-transitional", + .non_transitional_name = "virtio-9p-pci-non-transitional", .instance_size = sizeof(V9fsPCIState), .instance_init = virtio_9p_pci_instance_init, .class_init = virtio_9p_pci_class_init, @@ -2102,7 +2105,10 @@ static void virtio_blk_pci_instance_init(Object *obj) } static const VirtioPCIDeviceTypeInfo virtio_blk_pci_info = { - .generic_name = TYPE_VIRTIO_BLK_PCI, + .base_name = TYPE_VIRTIO_BLK_PCI, + .generic_name = "virtio-blk-pci", + .transitional_name = "virtio-blk-pci-transitional", + .non_transitional_name = "virtio-blk-pci-non-transitional", .instance_size = sizeof(VirtIOBlkPCI), .instance_init = virtio_blk_pci_instance_init, .class_init = virtio_blk_pci_class_init, @@ -2157,7 +2163,10 @@ static void vhost_user_blk_pci_instance_init(Object *obj) } static const VirtioPCIDeviceTypeInfo vhost_user_blk_pci_info = { - .generic_name = TYPE_VHOST_USER_BLK_PCI, + .base_name = TYPE_VHOST_USER_BLK_PCI, + .generic_name = "vhost-user-blk-pci", + .transitional_name = "vhost-user-blk-pci-transitional", + .non_transitional_name = "vhost-user-blk-pci-non-transitional", .instance_size = sizeof(VHostUserBlkPCI), .instance_init = vhost_user_blk_pci_instance_init, .class_init = vhost_user_blk_pci_class_init, @@ -2224,7 +2233,10 @@ static void virtio_scsi_pci_instance_init(Object *obj) } static const VirtioPCIDeviceTypeInfo virtio_scsi_pci_info = { - .generic_name = TYPE_VIRTIO_SCSI_PCI, + .base_name = TYPE_VIRTIO_SCSI_PCI, + .generic_name = "virtio-scsi-pci", + .transitional_name = "virtio-scsi-pci-transitional", + .non_transitional_name = "virtio-scsi-pci-non-transitional", .instance_size = sizeof(VirtIOSCSIPCI), .instance_init = virtio_scsi_pci_instance_init, .class_init = virtio_scsi_pci_class_init, @@ -2278,7 +2290,10 @@ static void vhost_scsi_pci_instance_init(Object *obj) } static const VirtioPCIDeviceTypeInfo vhost_scsi_pci_info = { - .generic_name = TYPE_VHOST_SCSI_PCI, + .base_name = TYPE_VHOST_SCSI_PCI, + .generic_name = "vhost-scsi-pci", + .transitional_name = "vhost-scsi-pci-transitional", + .non_transitional_name = "vhost-scsi-pci-non-transitional", .instance_size = sizeof(VHostSCSIPCI), .instance_init = vhost_scsi_pci_instance_init, .class_init = vhost_scsi_pci_class_init, @@ -2332,7 +2347,10 @@ static void vhost_user_scsi_pci_instance_init(Object *obj) } static const VirtioPCIDeviceTypeInfo vhost_user_scsi_pci_info = { - .generic_name = TYPE_VHOST_USER_SCSI_PCI, + .base_name = TYPE_VHOST_USER_SCSI_PCI, + .generic_name = "vhost-user-scsi-pci", + .transitional_name = "vhost-user-scsi-pci-transitional", + .non_transitional_name = "vhost-user-scsi-pci-non-transitional", .instance_size = sizeof(VHostUserSCSIPCI), .instance_init = vhost_user_scsi_pci_instance_init, .class_init = vhost_user_scsi_pci_class_init, @@ -2379,7 +2397,10 @@ static void vhost_vsock_pci_instance_init(Object *obj) } static const VirtioPCIDeviceTypeInfo vhost_vsock_pci_info = { - .generic_name = TYPE_VHOST_VSOCK_PCI, + .base_name = TYPE_VHOST_VSOCK_PCI, + .generic_name = "vhost-vsock-pci", + .transitional_name = "vhost-vsock-pci-transitional", + .non_transitional_name = "vhost-vsock-pci-non-transitional", .instance_size = sizeof(VHostVSockPCI), .instance_init = vhost_vsock_pci_instance_init, .class_init = vhost_vsock_pci_class_init, @@ -2435,7 +2456,10 @@ static void virtio_balloon_pci_instance_init(Object *obj) } static const VirtioPCIDeviceTypeInfo virtio_balloon_pci_info = { - .generic_name = TYPE_VIRTIO_BALLOON_PCI, + .base_name = TYPE_VIRTIO_BALLOON_PCI, + .generic_name = "virtio-balloon-pci", + .transitional_name = "virtio-balloon-pci-transitional", + .non_transitional_name = "virtio-balloon-pci-non-transitional", .instance_size = sizeof(VirtIOBalloonPCI), .instance_init = virtio_balloon_pci_instance_init, .class_init = virtio_balloon_pci_class_init, @@ -2507,7 +2531,10 @@ static void virtio_serial_pci_instance_init(Object *obj) } static const VirtioPCIDeviceTypeInfo virtio_serial_pci_info = { - .generic_name = TYPE_VIRTIO_SERIAL_PCI, + .base_name = TYPE_VIRTIO_SERIAL_PCI, + .generic_name = "virtio-serial-pci", + .transitional_name = "virtio-serial-pci-transitional", + .non_transitional_name = "virtio-serial-pci-non-transitional", .instance_size = sizeof(VirtIOSerialPCI), .instance_init = virtio_serial_pci_instance_init, .class_init = virtio_serial_pci_class_init, @@ -2561,7 +2588,10 @@ static void virtio_net_pci_instance_init(Object *obj) } static const VirtioPCIDeviceTypeInfo virtio_net_pci_info = { - .generic_name = TYPE_VIRTIO_NET_PCI, + .base_name = TYPE_VIRTIO_NET_PCI, + .generic_name = "virtio-net-pci", + .transitional_name = "virtio-net-pci-transitional", + .non_transitional_name = "virtio-net-pci-non-transitional", .instance_size = sizeof(VirtIONetPCI), .instance_init = virtio_net_pci_instance_init, .class_init = virtio_net_pci_class_init, @@ -2611,7 +2641,10 @@ static void virtio_rng_initfn(Object *obj) } static const VirtioPCIDeviceTypeInfo virtio_rng_pci_info = { - .generic_name = TYPE_VIRTIO_RNG_PCI, + .base_name = TYPE_VIRTIO_RNG_PCI, + .generic_name = "virtio-rng-pci", + .transitional_name = "virtio-rng-pci-transitional", + .non_transitional_name = "virtio-rng-pci-non-transitional", .instance_size = sizeof(VirtIORngPCI), .instance_init = virtio_rng_initfn, .class_init = virtio_rng_pci_class_init, @@ -2734,7 +2767,10 @@ static void virtio_host_initfn(Object *obj) } static const VirtioPCIDeviceTypeInfo virtio_host_pci_info = { - .generic_name = TYPE_VIRTIO_INPUT_HOST_PCI, + .base_name = TYPE_VIRTIO_INPUT_HOST_PCI, + .generic_name = "virtio-input-host-pci", + .transitional_name = "virtio-input-host-pci-transitional", + .non_transitional_name = "virtio-input-host-pci-non-transitional", .parent = TYPE_VIRTIO_INPUT_PCI, .instance_size = sizeof(VirtIOInputHostPCI), .instance_init = virtio_host_initfn, diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 8cd546608e..29b4216107 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -216,7 +216,7 @@ static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy) /* * virtio-scsi-pci: This extends VirtioPCIProxy. */ -#define TYPE_VIRTIO_SCSI_PCI "virtio-scsi-pci" +#define TYPE_VIRTIO_SCSI_PCI "virtio-scsi-pci-base" #define VIRTIO_SCSI_PCI(obj) \ OBJECT_CHECK(VirtIOSCSIPCI, (obj), TYPE_VIRTIO_SCSI_PCI) @@ -229,7 +229,7 @@ struct VirtIOSCSIPCI { /* * vhost-scsi-pci: This extends VirtioPCIProxy. */ -#define TYPE_VHOST_SCSI_PCI "vhost-scsi-pci" +#define TYPE_VHOST_SCSI_PCI "vhost-scsi-pci-base" #define VHOST_SCSI_PCI(obj) \ OBJECT_CHECK(VHostSCSIPCI, (obj), TYPE_VHOST_SCSI_PCI) @@ -239,7 +239,7 @@ struct VHostSCSIPCI { }; #endif -#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci" +#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci-base" #define VHOST_USER_SCSI_PCI(obj) \ OBJECT_CHECK(VHostUserSCSIPCI, (obj), TYPE_VHOST_USER_SCSI_PCI) @@ -252,7 +252,7 @@ struct VHostUserSCSIPCI { /* * vhost-user-blk-pci: This extends VirtioPCIProxy. */ -#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci" +#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci-base" #define VHOST_USER_BLK_PCI(obj) \ OBJECT_CHECK(VHostUserBlkPCI, (obj), TYPE_VHOST_USER_BLK_PCI) @@ -265,7 +265,7 @@ struct VHostUserBlkPCI { /* * virtio-blk-pci: This extends VirtioPCIProxy. */ -#define TYPE_VIRTIO_BLK_PCI "virtio-blk-pci" +#define TYPE_VIRTIO_BLK_PCI "virtio-blk-pci-base" #define VIRTIO_BLK_PCI(obj) \ OBJECT_CHECK(VirtIOBlkPCI, (obj), TYPE_VIRTIO_BLK_PCI) @@ -277,7 +277,7 @@ struct VirtIOBlkPCI { /* * virtio-balloon-pci: This extends VirtioPCIProxy. */ -#define TYPE_VIRTIO_BALLOON_PCI "virtio-balloon-pci" +#define TYPE_VIRTIO_BALLOON_PCI "virtio-balloon-pci-base" #define VIRTIO_BALLOON_PCI(obj) \ OBJECT_CHECK(VirtIOBalloonPCI, (obj), TYPE_VIRTIO_BALLOON_PCI) @@ -289,7 +289,7 @@ struct VirtIOBalloonPCI { /* * virtio-serial-pci: This extends VirtioPCIProxy. */ -#define TYPE_VIRTIO_SERIAL_PCI "virtio-serial-pci" +#define TYPE_VIRTIO_SERIAL_PCI "virtio-serial-pci-base" #define VIRTIO_SERIAL_PCI(obj) \ OBJECT_CHECK(VirtIOSerialPCI, (obj), TYPE_VIRTIO_SERIAL_PCI) @@ -301,7 +301,7 @@ struct VirtIOSerialPCI { /* * virtio-net-pci: This extends VirtioPCIProxy. */ -#define TYPE_VIRTIO_NET_PCI "virtio-net-pci" +#define TYPE_VIRTIO_NET_PCI "virtio-net-pci-base" #define VIRTIO_NET_PCI(obj) \ OBJECT_CHECK(VirtIONetPCI, (obj), TYPE_VIRTIO_NET_PCI) @@ -316,7 +316,7 @@ struct VirtIONetPCI { #ifdef CONFIG_VIRTFS -#define TYPE_VIRTIO_9P_PCI "virtio-9p-pci" +#define TYPE_VIRTIO_9P_PCI "virtio-9p-pci-base" #define VIRTIO_9P_PCI(obj) \ OBJECT_CHECK(V9fsPCIState, (obj), TYPE_VIRTIO_9P_PCI) @@ -330,7 +330,7 @@ typedef struct V9fsPCIState { /* * virtio-rng-pci: This extends VirtioPCIProxy. */ -#define TYPE_VIRTIO_RNG_PCI "virtio-rng-pci" +#define TYPE_VIRTIO_RNG_PCI "virtio-rng-pci-base" #define VIRTIO_RNG_PCI(obj) \ OBJECT_CHECK(VirtIORngPCI, (obj), TYPE_VIRTIO_RNG_PCI) @@ -365,7 +365,7 @@ struct VirtIOInputHIDPCI { #ifdef CONFIG_LINUX -#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci" +#define TYPE_VIRTIO_INPUT_HOST_PCI "virtio-input-host-pci-base" #define VIRTIO_INPUT_HOST_PCI(obj) \ OBJECT_CHECK(VirtIOInputHostPCI, (obj), TYPE_VIRTIO_INPUT_HOST_PCI) @@ -392,7 +392,7 @@ struct VirtIOGPUPCI { /* * vhost-vsock-pci: This extends VirtioPCIProxy. */ -#define TYPE_VHOST_VSOCK_PCI "vhost-vsock-pci" +#define TYPE_VHOST_VSOCK_PCI "vhost-vsock-pci-base" #define VHOST_VSOCK_PCI(obj) \ OBJECT_CHECK(VHostVSockPCI, (obj), TYPE_VHOST_VSOCK_PCI) diff --git a/tests/acceptance/virtio_version.py b/tests/acceptance/virtio_version.py new file mode 100644 index 0000000000..ce990250d8 --- /dev/null +++ b/tests/acceptance/virtio_version.py @@ -0,0 +1,176 @@ +""" +Check compatibility of virtio device types +""" +# Copyright (c) 2018 Red Hat, Inc. +# +# Author: +# Eduardo Habkost +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. +import sys +import os + +sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..", "scripts")) +from qemu import QEMUMachine +from avocado_qemu import Test + +# Virtio Device IDs: +VIRTIO_NET = 1 +VIRTIO_BLOCK = 2 +VIRTIO_CONSOLE = 3 +VIRTIO_RNG = 4 +VIRTIO_BALLOON = 5 +VIRTIO_RPMSG = 7 +VIRTIO_SCSI = 8 +VIRTIO_9P = 9 +VIRTIO_RPROC_SERIAL = 11 +VIRTIO_CAIF = 12 +VIRTIO_GPU = 16 +VIRTIO_INPUT = 18 +VIRTIO_VSOCK = 19 +VIRTIO_CRYPTO = 20 + +PCI_VENDOR_ID_REDHAT_QUMRANET = 0x1af4 + +# Device IDs for legacy/transitional devices: +PCI_LEGACY_DEVICE_IDS = { + VIRTIO_NET: 0x1000, + VIRTIO_BLOCK: 0x1001, + VIRTIO_BALLOON: 0x1002, + VIRTIO_CONSOLE: 0x1003, + VIRTIO_SCSI: 0x1004, + VIRTIO_RNG: 0x1005, + VIRTIO_9P: 0x1009, + VIRTIO_VSOCK: 0x1012, +} + +def pci_modern_device_id(virtio_devid): + return virtio_devid + 0x1040 + +def devtype_implements(vm, devtype, implements): + return devtype in [d['name'] for d in vm.command('qom-list-types', implements=implements)] + +def get_pci_interfaces(vm, devtype): + interfaces = ('pci-express-device', 'conventional-pci-device') + return [i for i in interfaces if devtype_implements(vm, devtype, i)] + +class VirtioVersionCheck(Test): + """ + Check if virtio-version-specific device types result in the + same device tree created by `disable-modern` and + `disable-legacy`. + + :avocado: enable + :avocado: tags=x86_64 + """ + + # just in case there are failures, show larger diff: + maxDiff = 4096 + + def run_device(self, devtype, opts=None, machine='pc'): + """ + Run QEMU with `-device DEVTYPE`, return device info from `query-pci` + """ + with QEMUMachine(self.qemu_bin) as vm: + vm.set_machine(machine) + if opts: + devtype += ',' + opts + vm.add_args('-device', '%s,id=devfortest' % (devtype)) + vm.add_args('-S') + vm.launch() + + pcibuses = vm.command('query-pci') + alldevs = [dev for bus in pcibuses for dev in bus['devices']] + devfortest = [dev for dev in alldevs + if dev['qdev_id'] == 'devfortest'] + return devfortest[0], get_pci_interfaces(vm, devtype) + + + def assert_devids(self, dev, devid, non_transitional=False): + self.assertEqual(dev['id']['vendor'], PCI_VENDOR_ID_REDHAT_QUMRANET) + self.assertEqual(dev['id']['device'], devid) + if non_transitional: + self.assertTrue(0x1040 <= dev['id']['device'] <= 0x107f) + self.assertGreaterEqual(dev['id']['subsystem'], 0x40) + + def check_all_variants(self, qemu_devtype, virtio_devid): + """Check if a virtio device type and its variants behave as expected""" + # Force modern mode: + dev_modern, _ = self.run_device(qemu_devtype, + 'disable-modern=off,disable-legacy=on') + self.assert_devids(dev_modern, pci_modern_device_id(virtio_devid), + non_transitional=True) + + # -non-transitional device types should be 100% equivalent to + # ,disable-modern=off,disable-legacy=on + dev_1_0, nt_ifaces = self.run_device('%s-non-transitional' % (qemu_devtype)) + self.assertEqual(dev_modern, dev_1_0) + + # Force transitional mode: + dev_trans, _ = self.run_device(qemu_devtype, + 'disable-modern=off,disable-legacy=off') + self.assert_devids(dev_trans, PCI_LEGACY_DEVICE_IDS[virtio_devid]) + + # Force legacy mode: + dev_legacy, _ = self.run_device(qemu_devtype, + 'disable-modern=on,disable-legacy=off') + self.assert_devids(dev_legacy, PCI_LEGACY_DEVICE_IDS[virtio_devid]) + + # No options: default to transitional on PC machine-type: + no_opts_pc, generic_ifaces = self.run_device(qemu_devtype) + self.assertEqual(dev_trans, no_opts_pc) + + #TODO: check if plugging on a PCI Express bus will make the + # device non-transitional + #no_opts_q35 = self.run_device(qemu_devtype, machine='q35') + #self.assertEqual(dev_modern, no_opts_q35) + + # -transitional device types should be 100% equivalent to + # ,disable-modern=off,disable-legacy=off + dev_trans, trans_ifaces = self.run_device('%s-transitional' % (qemu_devtype)) + self.assertEqual(dev_trans, dev_trans) + + # ensure the interface information is correct: + self.assertIn('conventional-pci-device', generic_ifaces) + self.assertIn('pci-express-device', generic_ifaces) + + self.assertIn('conventional-pci-device', nt_ifaces) + self.assertIn('pci-express-device', nt_ifaces) + + self.assertIn('conventional-pci-device', trans_ifaces) + self.assertNotIn('pci-express-device', trans_ifaces) + + + def test_conventional_devs(self): + self.check_all_variants('virtio-net-pci', VIRTIO_NET) + # virtio-blk requires 'driver' parameter + #self.check_all_variants('virtio-blk-pci', VIRTIO_BLOCK) + self.check_all_variants('virtio-serial-pci', VIRTIO_CONSOLE) + self.check_all_variants('virtio-rng-pci', VIRTIO_RNG) + self.check_all_variants('virtio-balloon-pci', VIRTIO_BALLOON) + self.check_all_variants('virtio-scsi-pci', VIRTIO_SCSI) + # virtio-9p requires 'fsdev' parameter + #self.check_all_variants('virtio-9p-pci', VIRTIO_9P) + + def check_modern_only(self, qemu_devtype, virtio_devid): + """Check if a modern-only virtio device type behaves as expected""" + # Force modern mode: + dev_modern, _ = self.run_device(qemu_devtype, + 'disable-modern=off,disable-legacy=on') + self.assert_devids(dev_modern, pci_modern_device_id(virtio_devid), + non_transitional=True) + + # No options: should be modern anyway + dev_no_opts, ifaces = self.run_device(qemu_devtype) + self.assertEqual(dev_modern, dev_no_opts) + + self.assertIn('conventional-pci-device', ifaces) + self.assertIn('pci-express-device', ifaces) + + def test_modern_only_devs(self): + self.check_modern_only('virtio-vga', VIRTIO_GPU) + self.check_modern_only('virtio-gpu-pci', VIRTIO_GPU) + self.check_modern_only('virtio-mouse-pci', VIRTIO_INPUT) + self.check_modern_only('virtio-tablet-pci', VIRTIO_INPUT) + self.check_modern_only('virtio-keyboard-pci', VIRTIO_INPUT) From c7eaf9a08e989a0465643607ae26e8499f74c48c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 11 Dec 2018 17:34:03 +0100 Subject: [PATCH 05/44] tests: Remove unused include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "hw/smbios/smbios.h" include is not used, remove it. Reviewed-by: Laszlo Ersek Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/acpi-utils.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/acpi-utils.c b/tests/acpi-utils.c index 6dc8ca1a8c..7b444a7453 100644 --- a/tests/acpi-utils.c +++ b/tests/acpi-utils.c @@ -15,7 +15,6 @@ #include "qemu/osdep.h" #include #include "qemu-common.h" -#include "hw/smbios/smbios.h" #include "qemu/bitmap.h" #include "acpi-utils.h" #include "boot-sector.h" From cc4d4cefcc1bd36f360c4671256024c2662653e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 11 Dec 2018 17:34:04 +0100 Subject: [PATCH 06/44] hw/smbios: Restrict access to "hw/smbios/ipmi.h" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All the consumers of "hw/smbios/ipmi.h" are located in hw/smbios/. There is no need to have this include publicly exposed, reduce the visibility by moving it in hw/smbios/. Reviewed-by: Laszlo Ersek Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/smbios/smbios.c | 2 +- include/hw/smbios/ipmi.h => hw/smbios/smbios_ipmi.h | 0 hw/smbios/smbios_type_38-stub.c | 2 +- hw/smbios/smbios_type_38.c | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename include/hw/smbios/ipmi.h => hw/smbios/smbios_ipmi.h (100%) diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index 04811279a0..6fe5be3586 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -28,7 +28,7 @@ #include "hw/loader.h" #include "exec/cpu-common.h" #include "smbios_build.h" -#include "hw/smbios/ipmi.h" +#include "smbios_ipmi.h" /* legacy structures and constants for <= 2.0 machines */ struct smbios_header { diff --git a/include/hw/smbios/ipmi.h b/hw/smbios/smbios_ipmi.h similarity index 100% rename from include/hw/smbios/ipmi.h rename to hw/smbios/smbios_ipmi.h diff --git a/hw/smbios/smbios_type_38-stub.c b/hw/smbios/smbios_type_38-stub.c index 5b83c9b1f1..fc4516bc8a 100644 --- a/hw/smbios/smbios_type_38-stub.c +++ b/hw/smbios/smbios_type_38-stub.c @@ -8,7 +8,7 @@ */ #include "qemu/osdep.h" -#include "hw/smbios/ipmi.h" +#include "smbios_ipmi.h" void smbios_build_type_38_table(void) { diff --git a/hw/smbios/smbios_type_38.c b/hw/smbios/smbios_type_38.c index 56e8609c00..d84e87d608 100644 --- a/hw/smbios/smbios_type_38.c +++ b/hw/smbios/smbios_type_38.c @@ -9,10 +9,10 @@ #include "qemu/osdep.h" #include "hw/ipmi/ipmi.h" -#include "hw/smbios/ipmi.h" #include "hw/smbios/smbios.h" #include "qemu/error-report.h" #include "smbios_build.h" +#include "smbios_ipmi.h" /* SMBIOS type 38 - IPMI */ struct smbios_type_38 { From 5aca89d194f66ed9ac2dfd47185d65c7b1a9f1e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 11 Dec 2018 17:34:05 +0100 Subject: [PATCH 07/44] hw/smbios: Remove "smbios_ipmi.h" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This header only declare a single function: smbios_build_type_38_table(). We already have a header that declares such functions: "smbios_build.h". Move the declaration and remove the header. Reviewed-by: Corey Minyard Reviewed-by: Laszlo Ersek Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/smbios/smbios.c | 1 - hw/smbios/smbios_build.h | 4 ++++ hw/smbios/smbios_ipmi.h | 15 --------------- hw/smbios/smbios_type_38-stub.c | 2 +- hw/smbios/smbios_type_38.c | 1 - 5 files changed, 5 insertions(+), 18 deletions(-) delete mode 100644 hw/smbios/smbios_ipmi.h diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index 6fe5be3586..4bff9b5ea4 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -28,7 +28,6 @@ #include "hw/loader.h" #include "exec/cpu-common.h" #include "smbios_build.h" -#include "smbios_ipmi.h" /* legacy structures and constants for <= 2.0 machines */ struct smbios_header { diff --git a/hw/smbios/smbios_build.h b/hw/smbios/smbios_build.h index 93b360d520..56b5a1e3f3 100644 --- a/hw/smbios/smbios_build.h +++ b/hw/smbios/smbios_build.h @@ -3,6 +3,7 @@ * * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. * Copyright (C) 2013 Red Hat, Inc. + * Copyright (c) 2015,2016 Corey Minyard, MontaVista Software, LLC * * Authors: * Alex Williamson @@ -96,4 +97,7 @@ extern unsigned smbios_table_cnt; smbios_table_cnt++; \ } while (0) +/* IPMI SMBIOS firmware handling */ +void smbios_build_type_38_table(void); + #endif /* QEMU_SMBIOS_BUILD_H */ diff --git a/hw/smbios/smbios_ipmi.h b/hw/smbios/smbios_ipmi.h deleted file mode 100644 index 1c9aae38f2..0000000000 --- a/hw/smbios/smbios_ipmi.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * IPMI SMBIOS firmware handling - * - * Copyright (c) 2015,2016 Corey Minyard, MontaVista Software, LLC - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#ifndef QEMU_SMBIOS_IPMI_H -#define QEMU_SMBIOS_IPMI_H - -void smbios_build_type_38_table(void); - -#endif /* QEMU_SMBIOS_IPMI_H */ diff --git a/hw/smbios/smbios_type_38-stub.c b/hw/smbios/smbios_type_38-stub.c index fc4516bc8a..14b53d004b 100644 --- a/hw/smbios/smbios_type_38-stub.c +++ b/hw/smbios/smbios_type_38-stub.c @@ -8,7 +8,7 @@ */ #include "qemu/osdep.h" -#include "smbios_ipmi.h" +#include "smbios_build.h" void smbios_build_type_38_table(void) { diff --git a/hw/smbios/smbios_type_38.c b/hw/smbios/smbios_type_38.c index d84e87d608..a1ad28d059 100644 --- a/hw/smbios/smbios_type_38.c +++ b/hw/smbios/smbios_type_38.c @@ -12,7 +12,6 @@ #include "hw/smbios/smbios.h" #include "qemu/error-report.h" #include "smbios_build.h" -#include "smbios_ipmi.h" /* SMBIOS type 38 - IPMI */ struct smbios_type_38 { From a2eb5c0cf7cc77736219015b840c5299499b1357 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 11 Dec 2018 17:34:06 +0100 Subject: [PATCH 08/44] hw/smbios: Move to the hw/firmware/ subdirectory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SMBIOS is just another firmware interface used by some QEMU models. We will later introduce more firmware interfaces in this subdirectory. Reviewed-by: Laszlo Ersek Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 2 +- hw/arm/virt.c | 2 +- hw/i386/pc.c | 2 +- hw/i386/pc_piix.c | 2 +- hw/i386/pc_q35.c | 2 +- hw/smbios/smbios-stub.c | 2 +- hw/smbios/smbios.c | 2 +- hw/smbios/smbios_type_38.c | 2 +- include/hw/{smbios => firmware}/smbios.h | 0 tests/bios-tables-test.c | 2 +- vl.c | 2 +- 11 files changed, 10 insertions(+), 10 deletions(-) rename include/hw/{smbios => firmware}/smbios.h (100%) diff --git a/MAINTAINERS b/MAINTAINERS index d676c73f88..63fa6d4642 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1262,7 +1262,7 @@ M: Michael S. Tsirkin M: Igor Mammedov S: Supported F: include/hw/acpi/* -F: include/hw/smbios/* +F: include/hw/firmware/smbios.h F: hw/mem/* F: hw/acpi/* F: hw/smbios/* diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 5b678237b7..c2641e56ea 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -55,7 +55,7 @@ #include "hw/intc/arm_gic.h" #include "hw/intc/arm_gicv3_common.h" #include "kvm_arm.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "qapi/visitor.h" #include "standard-headers/linux/input.h" #include "hw/arm/smmuv3.h" diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 115bc2825c..470cc5daf9 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -37,7 +37,7 @@ #include "hw/pci/pci_bus.h" #include "hw/nvram/fw_cfg.h" #include "hw/timer/hpet.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "hw/loader.h" #include "elf.h" #include "multiboot.h" diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 6981cfa740..e000c7511a 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -30,7 +30,7 @@ #include "hw/i386/pc.h" #include "hw/i386/apic.h" #include "hw/display/ramfb.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "hw/pci/pci.h" #include "hw/pci/pci_ids.h" #include "hw/usb.h" diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 58459bdab5..8836d21485 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -47,7 +47,7 @@ #include "hw/i386/amd_iommu.h" #include "hw/i386/intel_iommu.h" #include "hw/display/ramfb.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "hw/ide/pci.h" #include "hw/ide/ahci.h" #include "hw/usb.h" diff --git a/hw/smbios/smbios-stub.c b/hw/smbios/smbios-stub.c index d3a385441a..64e5ba93ec 100644 --- a/hw/smbios/smbios-stub.c +++ b/hw/smbios/smbios-stub.c @@ -23,7 +23,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qapi/qmp/qerror.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" void smbios_entry_add(QemuOpts *opts, Error **errp) { diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index 4bff9b5ea4..818be8a838 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -24,7 +24,7 @@ #include "sysemu/sysemu.h" #include "qemu/uuid.h" #include "sysemu/cpus.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "hw/loader.h" #include "exec/cpu-common.h" #include "smbios_build.h" diff --git a/hw/smbios/smbios_type_38.c b/hw/smbios/smbios_type_38.c index a1ad28d059..0c08f282de 100644 --- a/hw/smbios/smbios_type_38.c +++ b/hw/smbios/smbios_type_38.c @@ -9,7 +9,7 @@ #include "qemu/osdep.h" #include "hw/ipmi/ipmi.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "qemu/error-report.h" #include "smbios_build.h" diff --git a/include/hw/smbios/smbios.h b/include/hw/firmware/smbios.h similarity index 100% rename from include/hw/smbios/smbios.h rename to include/hw/firmware/smbios.h diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c index bac4b0171b..2b6004618f 100644 --- a/tests/bios-tables-test.c +++ b/tests/bios-tables-test.c @@ -13,7 +13,7 @@ #include "qemu/osdep.h" #include #include "qemu-common.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "qemu/bitmap.h" #include "acpi-utils.h" #include "boot-sector.h" diff --git a/vl.c b/vl.c index f5c8ef973c..46ebf813b3 100644 --- a/vl.c +++ b/vl.c @@ -61,7 +61,7 @@ int main(int argc, char **argv) #include "hw/display/vga.h" #include "hw/bt.h" #include "sysemu/watchdog.h" -#include "hw/smbios/smbios.h" +#include "hw/firmware/smbios.h" #include "hw/acpi/acpi.h" #include "hw/xen/xen.h" #include "hw/qdev.h" From e7176cdbe4d5e4b68459af741d6493886e4dad29 Mon Sep 17 00:00:00 2001 From: Matthias Weckbecker Date: Mon, 10 Dec 2018 14:00:48 +0100 Subject: [PATCH 09/44] hw/pci-bridge: Fix invalid free() When loadvm'ing a *running* snapshot qemu crashes due to an invalid free. It's fortunately caught early by glibc heap memory corruption protection and qemu gets killed with SIGABRT. Steps to reproduce: 1) Create VM (e.g w/ virsh define) 2) Start the VM and take a snapshot while it's running and having a PCI bridge attached 3) Destroy the VM and revert the running snapshot. This commit fixes the issue. Signed-off-by: Matthias Weckbecker Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci_bridge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c index ee9dff2d3a..b9143ac88b 100644 --- a/hw/pci/pci_bridge.c +++ b/hw/pci/pci_bridge.c @@ -241,9 +241,9 @@ void pci_bridge_update_mappings(PCIBridge *br) * while another accesses an unaffected region. */ memory_region_transaction_begin(); pci_bridge_region_del(br, br->windows); + pci_bridge_region_cleanup(br, w); br->windows = pci_bridge_region_init(br); memory_region_transaction_commit(); - pci_bridge_region_cleanup(br, w); } /* default write_config function for PCI-to-PCI bridge */ From d96a0ac71c7a2332a00da1a496eae6bcb9948502 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 12 Dec 2018 12:38:41 -0700 Subject: [PATCH 10/44] pcie: Create enums for link speed and width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for reporting higher virtual link speeds and widths, create enums and macros to help us manage them. Cc: Marcel Apfelbaum Tested-by: Geoffrey McRae Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Eric Auger Signed-off-by: Alex Williamson Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 7 ++++--- hw/vfio/pci.c | 3 ++- include/hw/pci/pcie_regs.h | 23 +++++++++++++++++++++-- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 66b73b87c8..aef84c665b 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -68,11 +68,12 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version) pci_set_long(exp_cap + PCI_EXP_LNKCAP, (port << PCI_EXP_LNKCAP_PN_SHIFT) | PCI_EXP_LNKCAP_ASPMS_0S | - PCI_EXP_LNK_MLW_1 | - PCI_EXP_LNK_LS_25); + QEMU_PCI_EXP_LNKCAP_MLW(QEMU_PCI_EXP_LNK_X1) | + QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT)); pci_set_word(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25); + QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1) | + QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT)); if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) { pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 5c7bd96984..74f9a46b4b 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1897,7 +1897,8 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size, PCI_EXP_TYPE_ENDPOINT << 4, PCI_EXP_FLAGS_TYPE); vfio_add_emulated_long(vdev, pos + PCI_EXP_LNKCAP, - PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25, ~0); + QEMU_PCI_EXP_LNKCAP_MLW(QEMU_PCI_EXP_LNK_X1) | + QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), ~0); vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0); } diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h index a95522a13b..ad4e7808b8 100644 --- a/include/hw/pci/pcie_regs.h +++ b/include/hw/pci/pcie_regs.h @@ -34,10 +34,29 @@ /* PCI_EXP_LINK{CAP, STA} */ /* link speed */ -#define PCI_EXP_LNK_LS_25 1 +typedef enum PCIExpLinkSpeed { + QEMU_PCI_EXP_LNK_2_5GT = 1, + QEMU_PCI_EXP_LNK_5GT, + QEMU_PCI_EXP_LNK_8GT, + QEMU_PCI_EXP_LNK_16GT, +} PCIExpLinkSpeed; + +#define QEMU_PCI_EXP_LNKCAP_MLS(speed) (speed) +#define QEMU_PCI_EXP_LNKSTA_CLS QEMU_PCI_EXP_LNKCAP_MLS + +typedef enum PCIExpLinkWidth { + QEMU_PCI_EXP_LNK_X1 = 1, + QEMU_PCI_EXP_LNK_X2 = 2, + QEMU_PCI_EXP_LNK_X4 = 4, + QEMU_PCI_EXP_LNK_X8 = 8, + QEMU_PCI_EXP_LNK_X12 = 12, + QEMU_PCI_EXP_LNK_X16 = 16, + QEMU_PCI_EXP_LNK_X32 = 32, +} PCIExpLinkWidth; #define PCI_EXP_LNK_MLW_SHIFT ctz32(PCI_EXP_LNKCAP_MLW) -#define PCI_EXP_LNK_MLW_1 (1 << PCI_EXP_LNK_MLW_SHIFT) +#define QEMU_PCI_EXP_LNKCAP_MLW(width) (width << PCI_EXP_LNK_MLW_SHIFT) +#define QEMU_PCI_EXP_LNKSTA_NLW QEMU_PCI_EXP_LNKCAP_MLW /* PCI_EXP_LINKCAP */ #define PCI_EXP_LNKCAP_ASPMS_SHIFT ctz32(PCI_EXP_LNKCAP_ASPMS) From 727b48661f757678f7f42f557ddac072c5a49721 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 12 Dec 2018 12:38:55 -0700 Subject: [PATCH 11/44] pci: Sync PCIe downstream port LNKSTA on read The PCIe link speed and width between a downstream device and its upstream port is negotiated on real hardware and susceptible to dynamic changes due to signal issues and power management. In the emulated device case there is no real hardware link, but we still might wish to have some consistency between endpoint and downstream port via a virtual negotiation. There is of course a real link for assigned devices and this same virtual negotiation allows the downstream port to match the endpoint, synchronizing on every read to support underlying physical hardware dynamically adjusting the link. This negotiation is intentionally unidirectional for compatibility. If the endpoint exceeds the capabilities of the downstream port or there is no endpoint device, the downstream port reports negotiation to its maximum speed and width, matching the previous case where negotiation was absent. De-tuning the endpoint to match a virtual link doesn't seem to benefit anyone and is a condition we've thus far reported without functional issues. Note that PCI_EXP_LNKSTA is already ignored for migration compatibility via pcie_cap_v1_fill(). Cc: Marcel Apfelbaum Tested-by: Geoffrey McRae Reviewed-by: Eric Auger Signed-off-by: Alex Williamson Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 4 ++++ hw/pci/pcie.c | 39 +++++++++++++++++++++++++++++++++++++++ include/hw/pci/pci.h | 13 +++++++++++++ include/hw/pci/pcie.h | 1 + 4 files changed, 57 insertions(+) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index efb5ce196f..d831fa0a36 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1353,6 +1353,10 @@ uint32_t pci_default_read_config(PCIDevice *d, { uint32_t val = 0; + if (pci_is_express_downstream_port(d) && + ranges_overlap(address, len, d->exp.exp_cap + PCI_EXP_LNKSTA, 2)) { + pcie_sync_bridge_lnk(d); + } memcpy(&val, d->config + address, len); return le32_to_cpu(val); } diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index aef84c665b..6891deb711 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -741,6 +741,45 @@ void pcie_add_capability(PCIDevice *dev, memset(dev->cmask + offset, 0xFF, size); } +/* + * Sync the PCIe Link Status negotiated speed and width of a bridge with the + * downstream device. If downstream device is not present, re-write with the + * Link Capability fields. Limit width and speed to bridge capabilities for + * compatibility. Use config_read to access the downstream device since it + * could be an assigned device with volatile link information. + */ +void pcie_sync_bridge_lnk(PCIDevice *bridge_dev) +{ + PCIBridge *br = PCI_BRIDGE(bridge_dev); + PCIBus *bus = pci_bridge_get_sec_bus(br); + PCIDevice *target = bus->devices[0]; + uint8_t *exp_cap = bridge_dev->config + bridge_dev->exp.exp_cap; + uint16_t lnksta, lnkcap = pci_get_word(exp_cap + PCI_EXP_LNKCAP); + + if (!target || !target->exp.exp_cap) { + lnksta = lnkcap; + } else { + lnksta = target->config_read(target, + target->exp.exp_cap + PCI_EXP_LNKSTA, + sizeof(lnksta)); + + if ((lnksta & PCI_EXP_LNKSTA_NLW) > (lnkcap & PCI_EXP_LNKCAP_MLW)) { + lnksta &= ~PCI_EXP_LNKSTA_NLW; + lnksta |= lnkcap & PCI_EXP_LNKCAP_MLW; + } + + if ((lnksta & PCI_EXP_LNKSTA_CLS) > (lnkcap & PCI_EXP_LNKCAP_SLS)) { + lnksta &= ~PCI_EXP_LNKSTA_CLS; + lnksta |= lnkcap & PCI_EXP_LNKCAP_SLS; + } + } + + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); + pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, lnksta & + (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW)); +} + /************************************************************************** * pci express extended capability helper functions */ diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index e6514bba23..eb12fa112e 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -737,6 +737,19 @@ static inline int pci_is_express(const PCIDevice *d) return d->cap_present & QEMU_PCI_CAP_EXPRESS; } +static inline int pci_is_express_downstream_port(const PCIDevice *d) +{ + uint8_t type; + + if (!pci_is_express(d) || !d->exp.exp_cap) { + return 0; + } + + type = pcie_cap_get_type(d); + + return type == PCI_EXP_TYPE_DOWNSTREAM || type == PCI_EXP_TYPE_ROOT_PORT; +} + static inline uint32_t pci_config_size(const PCIDevice *d) { return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE; diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h index b71e369703..1976909ab4 100644 --- a/include/hw/pci/pcie.h +++ b/include/hw/pci/pcie.h @@ -126,6 +126,7 @@ uint16_t pcie_find_capability(PCIDevice *dev, uint16_t cap_id); void pcie_add_capability(PCIDevice *dev, uint16_t cap_id, uint8_t cap_ver, uint16_t offset, uint16_t size); +void pcie_sync_bridge_lnk(PCIDevice *dev); void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn); void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num); From 4695a2c50076879000ddde9f80d07bbcacfa0f26 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 12 Dec 2018 12:39:08 -0700 Subject: [PATCH 12/44] qapi: Define PCIe link speed and width properties Create properties to be able to define speeds and widths for PCIe links. The only tricky bit here is that our get and set callbacks translate from the fixed QAPI automagic enums to those we define in PCI code to represent the actual register segment value. Cc: Eric Blake Tested-by: Geoffrey McRae Reviewed-by: Markus Armbruster Signed-off-by: Alex Williamson Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/core/qdev-properties.c | 176 +++++++++++++++++++++++++++++++++++ include/hw/qdev-properties.h | 8 ++ qapi/common.json | 42 +++++++++ 3 files changed, 226 insertions(+) diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index bd84c4ea4c..943dc2654b 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -1297,3 +1297,179 @@ const PropertyInfo qdev_prop_off_auto_pcibar = { .set = set_enum, .set_default_value = set_default_value_enum, }; + +/* --- PCIELinkSpeed 2_5/5/8/16 -- */ + +static void get_prop_pcielinkspeed(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + PCIExpLinkSpeed *p = qdev_get_prop_ptr(dev, prop); + int speed; + + switch (*p) { + case QEMU_PCI_EXP_LNK_2_5GT: + speed = PCIE_LINK_SPEED_2_5; + break; + case QEMU_PCI_EXP_LNK_5GT: + speed = PCIE_LINK_SPEED_5; + break; + case QEMU_PCI_EXP_LNK_8GT: + speed = PCIE_LINK_SPEED_8; + break; + case QEMU_PCI_EXP_LNK_16GT: + speed = PCIE_LINK_SPEED_16; + break; + default: + /* Unreachable */ + abort(); + } + + visit_type_enum(v, prop->name, &speed, prop->info->enum_table, errp); +} + +static void set_prop_pcielinkspeed(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + PCIExpLinkSpeed *p = qdev_get_prop_ptr(dev, prop); + int speed; + Error *local_err = NULL; + + if (dev->realized) { + qdev_prop_set_after_realize(dev, name, errp); + return; + } + + visit_type_enum(v, prop->name, &speed, prop->info->enum_table, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + switch (speed) { + case PCIE_LINK_SPEED_2_5: + *p = QEMU_PCI_EXP_LNK_2_5GT; + break; + case PCIE_LINK_SPEED_5: + *p = QEMU_PCI_EXP_LNK_5GT; + break; + case PCIE_LINK_SPEED_8: + *p = QEMU_PCI_EXP_LNK_8GT; + break; + case PCIE_LINK_SPEED_16: + *p = QEMU_PCI_EXP_LNK_16GT; + break; + default: + /* Unreachable */ + abort(); + } +} + +const PropertyInfo qdev_prop_pcie_link_speed = { + .name = "PCIELinkSpeed", + .description = "2_5/5/8/16", + .enum_table = &PCIELinkSpeed_lookup, + .get = get_prop_pcielinkspeed, + .set = set_prop_pcielinkspeed, + .set_default_value = set_default_value_enum, +}; + +/* --- PCIELinkWidth 1/2/4/8/12/16/32 -- */ + +static void get_prop_pcielinkwidth(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + PCIExpLinkWidth *p = qdev_get_prop_ptr(dev, prop); + int width; + + switch (*p) { + case QEMU_PCI_EXP_LNK_X1: + width = PCIE_LINK_WIDTH_1; + break; + case QEMU_PCI_EXP_LNK_X2: + width = PCIE_LINK_WIDTH_2; + break; + case QEMU_PCI_EXP_LNK_X4: + width = PCIE_LINK_WIDTH_4; + break; + case QEMU_PCI_EXP_LNK_X8: + width = PCIE_LINK_WIDTH_8; + break; + case QEMU_PCI_EXP_LNK_X12: + width = PCIE_LINK_WIDTH_12; + break; + case QEMU_PCI_EXP_LNK_X16: + width = PCIE_LINK_WIDTH_16; + break; + case QEMU_PCI_EXP_LNK_X32: + width = PCIE_LINK_WIDTH_32; + break; + default: + /* Unreachable */ + abort(); + } + + visit_type_enum(v, prop->name, &width, prop->info->enum_table, errp); +} + +static void set_prop_pcielinkwidth(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + PCIExpLinkWidth *p = qdev_get_prop_ptr(dev, prop); + int width; + Error *local_err = NULL; + + if (dev->realized) { + qdev_prop_set_after_realize(dev, name, errp); + return; + } + + visit_type_enum(v, prop->name, &width, prop->info->enum_table, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + switch (width) { + case PCIE_LINK_WIDTH_1: + *p = QEMU_PCI_EXP_LNK_X1; + break; + case PCIE_LINK_WIDTH_2: + *p = QEMU_PCI_EXP_LNK_X2; + break; + case PCIE_LINK_WIDTH_4: + *p = QEMU_PCI_EXP_LNK_X4; + break; + case PCIE_LINK_WIDTH_8: + *p = QEMU_PCI_EXP_LNK_X8; + break; + case PCIE_LINK_WIDTH_12: + *p = QEMU_PCI_EXP_LNK_X12; + break; + case PCIE_LINK_WIDTH_16: + *p = QEMU_PCI_EXP_LNK_X16; + break; + case PCIE_LINK_WIDTH_32: + *p = QEMU_PCI_EXP_LNK_X32; + break; + default: + /* Unreachable */ + abort(); + } +} + +const PropertyInfo qdev_prop_pcie_link_width = { + .name = "PCIELinkWidth", + .description = "1/2/4/8/12/16/32", + .enum_table = &PCIELinkWidth_lookup, + .get = get_prop_pcielinkwidth, + .set = set_prop_pcielinkwidth, + .set_default_value = set_default_value_enum, +}; diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index 3ab9cd2eb6..b6758c852e 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -36,6 +36,8 @@ extern const PropertyInfo qdev_prop_uuid; extern const PropertyInfo qdev_prop_arraylen; extern const PropertyInfo qdev_prop_link; extern const PropertyInfo qdev_prop_off_auto_pcibar; +extern const PropertyInfo qdev_prop_pcie_link_speed; +extern const PropertyInfo qdev_prop_pcie_link_width; #define DEFINE_PROP(_name, _state, _field, _prop, _type) { \ .name = (_name), \ @@ -217,6 +219,12 @@ extern const PropertyInfo qdev_prop_off_auto_pcibar; #define DEFINE_PROP_OFF_AUTO_PCIBAR(_n, _s, _f, _d) \ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_off_auto_pcibar, \ OffAutoPCIBAR) +#define DEFINE_PROP_PCIE_LINK_SPEED(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pcie_link_speed, \ + PCIExpLinkSpeed) +#define DEFINE_PROP_PCIE_LINK_WIDTH(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pcie_link_width, \ + PCIExpLinkWidth) #define DEFINE_PROP_UUID(_name, _state, _field) { \ .name = (_name), \ diff --git a/qapi/common.json b/qapi/common.json index 021174f04e..99d313ef3b 100644 --- a/qapi/common.json +++ b/qapi/common.json @@ -127,6 +127,48 @@ { 'enum': 'OffAutoPCIBAR', 'data': [ 'off', 'auto', 'bar0', 'bar1', 'bar2', 'bar3', 'bar4', 'bar5' ] } +## +# @PCIELinkSpeed: +# +# An enumeration of PCIe link speeds in units of GT/s +# +# @2_5: 2.5GT/s +# +# @5: 5.0GT/s +# +# @8: 8.0GT/s +# +# @16: 16.0GT/s +# +# Since: 4.0 +## +{ 'enum': 'PCIELinkSpeed', + 'data': [ '2_5', '5', '8', '16' ] } + +## +# @PCIELinkWidth: +# +# An enumeration of PCIe link width +# +# @1: x1 +# +# @2: x2 +# +# @4: x4 +# +# @8: x8 +# +# @12: x12 +# +# @16: x16 +# +# @32: x32 +# +# Since: 4.0 +## +{ 'enum': 'PCIELinkWidth', + 'data': [ '1', '2', '4', '8', '12', '16', '32' ] } + ## # @SysEmuTarget: # From ea8cfdb5d19af45f98abe02844c7963dafec6e92 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 12 Dec 2018 12:39:16 -0700 Subject: [PATCH 13/44] pcie: Add link speed and width fields to PCIESlot Add fields allowing the PCIe link speed and width of a PCIESlot to be configured, with an instance_post_init callback on the root port parent class to set defaults. This allows child classes to set these via properties or via their own instance_init callback, without requiring all implementions to support arbitrary user selected values. Cc: Marcel Apfelbaum Tested-by: Geoffrey McRae Reviewed-by: Eric Auger Signed-off-by: Alex Williamson Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/pcie_root_port.c | 14 ++++++++++++++ include/hw/pci/pcie_port.h | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c index 45f9e8cd4a..34ad76743c 100644 --- a/hw/pci-bridge/pcie_root_port.c +++ b/hw/pci-bridge/pcie_root_port.c @@ -140,6 +140,19 @@ static Property rp_props[] = { DEFINE_PROP_END_OF_LIST() }; +static void rp_instance_post_init(Object *obj) +{ + PCIESlot *s = PCIE_SLOT(obj); + + if (!s->speed) { + s->speed = QEMU_PCI_EXP_LNK_2_5GT; + } + + if (!s->width) { + s->width = QEMU_PCI_EXP_LNK_X1; + } +} + static void rp_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -157,6 +170,7 @@ static void rp_class_init(ObjectClass *klass, void *data) static const TypeInfo rp_info = { .name = TYPE_PCIE_ROOT_PORT, .parent = TYPE_PCIE_SLOT, + .instance_post_init = rp_instance_post_init, .class_init = rp_class_init, .abstract = true, .class_size = sizeof(PCIERootPortClass), diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h index 0736014bfd..df242a0caf 100644 --- a/include/hw/pci/pcie_port.h +++ b/include/hw/pci/pcie_port.h @@ -49,6 +49,10 @@ struct PCIESlot { /* pci express switch port with slot */ uint8_t chassis; uint16_t slot; + + PCIExpLinkSpeed speed; + PCIExpLinkWidth width; + QLIST_ENTRY(PCIESlot) next; }; From 3d67447fe7c295c7da75399b63e37cf0372509eb Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 12 Dec 2018 12:39:31 -0700 Subject: [PATCH 14/44] pcie: Fill PCIESlot link fields to support higher speeds and widths Make use of the PCIESlot speed and width fields to update link information beyond those configured in pcie_cap_v1_fill(). This is only called for devices supporting a version 2 capability and automatically skips any non-PCIESlot devices. Only devices with increased link values generate any visible config space differences. Cc: Marcel Apfelbaum Tested-by: Geoffrey McRae Signed-off-by: Alex Williamson Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 6891deb711..d91a615193 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -27,6 +27,7 @@ #include "hw/pci/msi.h" #include "hw/pci/pci_bus.h" #include "hw/pci/pcie_regs.h" +#include "hw/pci/pcie_port.h" #include "qemu/range.h" //#define DEBUG_PCIE @@ -87,6 +88,76 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, uint8_t version) pci_set_word(cmask + PCI_EXP_LNKSTA, 0); } +static void pcie_cap_fill_slot_lnk(PCIDevice *dev) +{ + PCIESlot *s = (PCIESlot *)object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT); + uint8_t *exp_cap = dev->config + dev->exp.exp_cap; + + /* Skip anything that isn't a PCIESlot */ + if (!s) { + return; + } + + /* Clear and fill LNKCAP from what was configured above */ + pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP, + PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS); + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, + QEMU_PCI_EXP_LNKCAP_MLW(s->width) | + QEMU_PCI_EXP_LNKCAP_MLS(s->speed)); + + /* + * Link bandwidth notification is required for all root ports and + * downstream ports supporting links wider than x1 or multiple link + * speeds. + */ + if (s->width > QEMU_PCI_EXP_LNK_X1 || + s->speed > QEMU_PCI_EXP_LNK_2_5GT) { + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, + PCI_EXP_LNKCAP_LBNC); + } + + if (s->speed > QEMU_PCI_EXP_LNK_2_5GT) { + /* + * Hot-plug capable downstream ports and downstream ports supporting + * link speeds greater than 5GT/s must hardwire PCI_EXP_LNKCAP_DLLLARC + * to 1b. PCI_EXP_LNKCAP_DLLLARC implies PCI_EXP_LNKSTA_DLLLA, which + * we also hardwire to 1b here. 2.5GT/s hot-plug slots should also + * technically implement this, but it's not done here for compatibility. + */ + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, + PCI_EXP_LNKCAP_DLLLARC); + pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); + + /* + * Target Link Speed defaults to the highest link speed supported by + * the component. 2.5GT/s devices are permitted to hardwire to zero. + */ + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS); + pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKCTL2, + QEMU_PCI_EXP_LNKCAP_MLS(s->speed) & + PCI_EXP_LNKCTL2_TLS); + } + + /* + * 2.5 & 5.0GT/s can be fully described by LNKCAP, but 8.0GT/s is + * actually a reference to the highest bit supported in this register. + * We assume the device supports all link speeds. + */ + if (s->speed > QEMU_PCI_EXP_LNK_5GT) { + pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP2, ~0U); + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, + PCI_EXP_LNKCAP2_SLS_2_5GB | + PCI_EXP_LNKCAP2_SLS_5_0GB | + PCI_EXP_LNKCAP2_SLS_8_0GB); + if (s->speed > QEMU_PCI_EXP_LNK_8GT) { + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2, + PCI_EXP_LNKCAP2_SLS_16_0GB); + } + } +} + int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port, Error **errp) @@ -108,6 +179,9 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset, /* Filling values common with v1 */ pcie_cap_v1_fill(dev, port, type, PCI_EXP_FLAGS_VER2); + /* Fill link speed and width options */ + pcie_cap_fill_slot_lnk(dev); + /* Filling v2 specific values */ pci_set_long(exp_cap + PCI_EXP_DEVCAP2, PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP); From c2a490e344b4e231cf9488c67df7ee46977b1ebe Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 12 Dec 2018 12:39:43 -0700 Subject: [PATCH 15/44] pcie: Allow generic PCIe root port to specify link speed and width Allow users to experimentally specify speed and width values for the generic PCIe root port. Defaults remain at 2.5GT/s & x1 for compatiblity with the intent to only support changing defaults via machine types for now. Note for libvirt testing that pcie-root-port controllers are given default names like "pci.7" which don't play well with using the "-set device.$name.$prop=$value" options accessible to us via options. The solution is to add an to the pcie-root-port , for example:
The "ua-" here is a mandatory prefix. We can then use: or, without an alias, set globals such as: Cc: Marcel Apfelbaum Tested-by: Geoffrey McRae Reviewed-by: Eric Auger Signed-off-by: Alex Williamson Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/gen_pcie_root_port.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c index 299de429ec..ca5418a89d 100644 --- a/hw/pci-bridge/gen_pcie_root_port.c +++ b/hw/pci-bridge/gen_pcie_root_port.c @@ -124,6 +124,10 @@ static Property gen_rp_props[] = { res_reserve.mem_pref_32, -1), DEFINE_PROP_SIZE("pref64-reserve", GenPCIERootPort, res_reserve.mem_pref_64, -1), + DEFINE_PROP_PCIE_LINK_SPEED("x-speed", PCIESlot, + speed, PCIE_LINK_SPEED_2_5), + DEFINE_PROP_PCIE_LINK_WIDTH("x-width", PCIESlot, + width, PCIE_LINK_WIDTH_1), DEFINE_PROP_END_OF_LIST() }; From d26e543891068254c4575567a31280be5881e49d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 12 Dec 2018 12:39:56 -0700 Subject: [PATCH 16/44] vfio/pci: Remove PCIe Link Status emulation Now that the downstream port will virtually negotiate itself to the link status of the downstream device, we can remove this emulation. It's not clear that it was every terribly useful anyway. Tested-by: Geoffrey McRae Reviewed-by: Eric Auger Signed-off-by: Alex Williamson Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/vfio/pci.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 74f9a46b4b..c0cb1ec289 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1901,12 +1901,6 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size, QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), ~0); vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0); } - - /* Mark the Link Status bits as emulated to allow virtual negotiation */ - vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKSTA, - pci_get_word(vdev->pdev.config + pos + - PCI_EXP_LNKSTA), - PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS); } /* From a09d2038cc5f8e45e2126461e5fb1eb9f4874be3 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 12 Dec 2018 12:40:09 -0700 Subject: [PATCH 17/44] pcie: Fast PCIe root ports for new machines Change the default speed and width for new machine types to the fastest and widest currently supported. This should be compatible to the PCIe 4.0 spec. Pre-QEMU-4.0 machine types remain at 2.5GT/s, x1 width. Cc: Marcel Apfelbaum Reviewed-by: Eric Auger Signed-off-by: Alex Williamson Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/gen_pcie_root_port.c | 4 ++-- include/hw/compat.h | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c index ca5418a89d..9766edb445 100644 --- a/hw/pci-bridge/gen_pcie_root_port.c +++ b/hw/pci-bridge/gen_pcie_root_port.c @@ -125,9 +125,9 @@ static Property gen_rp_props[] = { DEFINE_PROP_SIZE("pref64-reserve", GenPCIERootPort, res_reserve.mem_pref_64, -1), DEFINE_PROP_PCIE_LINK_SPEED("x-speed", PCIESlot, - speed, PCIE_LINK_SPEED_2_5), + speed, PCIE_LINK_SPEED_16), DEFINE_PROP_PCIE_LINK_WIDTH("x-width", PCIESlot, - width, PCIE_LINK_WIDTH_1), + width, PCIE_LINK_WIDTH_32), DEFINE_PROP_END_OF_LIST() }; diff --git a/include/hw/compat.h b/include/hw/compat.h index 70958328fe..3ca85b037c 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -2,7 +2,15 @@ #define HW_COMPAT_H #define HW_COMPAT_3_1 \ - /* empty */ + {\ + .driver = "pcie-root-port",\ + .property = "x-speed",\ + .value = "2_5",\ + },{\ + .driver = "pcie-root-port",\ + .property = "x-width",\ + .value = "1",\ + }, #define HW_COMPAT_3_0 \ /* empty */ From 662b4b69bac99a5cf5f71f2bba7b688d7608250b Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 17 Dec 2018 15:31:10 +0800 Subject: [PATCH 18/44] intel_iommu: dump correct iova when failed The iotlb.iova can be zero if failure really happened. Dump the addr instead. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index d97bcbc2f7..f21988f396 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2540,7 +2540,7 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr, __func__, pci_bus_num(vtd_as->bus), VTD_PCI_SLOT(vtd_as->devfn), VTD_PCI_FUNC(vtd_as->devfn), - iotlb.iova); + addr); } return iotlb; From 095955b24d25715f38b78703dc0a295761bffaca Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 17 Dec 2018 15:31:11 +0800 Subject: [PATCH 19/44] intel_iommu: convert invalid traces into error reports Report more *_invalid() tracepoints to error_report_once() so that we can detect issues even without tracing enabled. Drop those tracepoints. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 58 ++++++++++++++++++++++++++++++++----------- hw/i386/trace-events | 6 ----- 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index f21988f396..4806d7edb4 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -524,7 +524,6 @@ static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index, addr = s->root + index * sizeof(*re); if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) { - trace_vtd_re_invalid(re->rsvd, re->val); re->val = 0; return -VTD_FR_ROOT_TABLE_INV; } @@ -545,7 +544,6 @@ static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index, /* we have checked that root entry is present */ addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce); if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) { - trace_vtd_re_invalid(root->rsvd, root->val); return -VTD_FR_CONTEXT_TABLE_INV; } ce->lo = le64_to_cpu(ce->lo); @@ -630,16 +628,20 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu, break; case VTD_CONTEXT_TT_DEV_IOTLB: if (!x86_iommu->dt_supported) { + error_report_once("%s: DT specified but not supported", __func__); return false; } break; case VTD_CONTEXT_TT_PASS_THROUGH: if (!x86_iommu->pt_supported) { + error_report_once("%s: PT specified but not supported", __func__); return false; } break; default: /* Unknwon type */ + error_report_once("%s: unknown ce type: %"PRIu32, __func__, + vtd_ce_get_type(ce)); return false; } return true; @@ -1003,7 +1005,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, } if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(s->aw_bits))) { - trace_vtd_re_invalid(re.rsvd, re.val); + error_report_once("%s: invalid root entry: rsvd=0x%"PRIx64 + ", val=0x%"PRIx64" (reserved nonzero)", + __func__, re.rsvd, re.val); return -VTD_FR_ROOT_ENTRY_RSVD; } @@ -1020,19 +1024,23 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(s->aw_bits))) { - trace_vtd_ce_invalid(ce->hi, ce->lo); + error_report_once("%s: invalid context entry: hi=%"PRIx64 + ", lo=%"PRIx64" (reserved nonzero)", + __func__, ce->hi, ce->lo); return -VTD_FR_CONTEXT_ENTRY_RSVD; } /* Check if the programming of context-entry is valid */ if (!vtd_is_level_supported(s, vtd_ce_get_level(ce))) { - trace_vtd_ce_invalid(ce->hi, ce->lo); + error_report_once("%s: invalid context entry: hi=%"PRIx64 + ", lo=%"PRIx64" (level %d not supported)", + __func__, ce->hi, ce->lo, vtd_ce_get_level(ce)); return -VTD_FR_CONTEXT_ENTRY_INV; } /* Do translation type check */ if (!vtd_ce_type_check(x86_iommu, ce)) { - trace_vtd_ce_invalid(ce->hi, ce->lo); + /* Errors dumped in vtd_ce_type_check() */ return -VTD_FR_CONTEXT_ENTRY_INV; } @@ -1878,7 +1886,9 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) || (inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) { - trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64 + " (reserved nonzero)", __func__, inv_desc->hi, + inv_desc->lo); return false; } if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) { @@ -1901,7 +1911,9 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) /* Interrupt flag */ vtd_generate_completion_event(s); } else { - trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64 + " (unknown type)", __func__, inv_desc->hi, + inv_desc->lo); return false; } return true; @@ -1913,7 +1925,9 @@ static bool vtd_process_context_cache_desc(IntelIOMMUState *s, uint16_t sid, fmask; if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) { - trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid cc inv desc: hi=%"PRIx64", lo=%"PRIx64 + " (reserved nonzero)", __func__, inv_desc->hi, + inv_desc->lo); return false; } switch (inv_desc->lo & VTD_INV_DESC_CC_G) { @@ -1932,7 +1946,9 @@ static bool vtd_process_context_cache_desc(IntelIOMMUState *s, break; default: - trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid cc inv desc: hi=%"PRIx64", lo=%"PRIx64 + " (invalid type)", __func__, inv_desc->hi, + inv_desc->lo); return false; } return true; @@ -1946,7 +1962,9 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) || (inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) { - trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid iotlb inv desc: hi=0x%"PRIx64 + ", lo=0x%"PRIx64" (reserved bits unzero)\n", + __func__, inv_desc->hi, inv_desc->lo); return false; } @@ -1965,14 +1983,20 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi); am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi); if (am > VTD_MAMV) { - trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid iotlb inv desc: hi=0x%"PRIx64 + ", lo=0x%"PRIx64" (am=%u > VTD_MAMV=%u)\n", + __func__, inv_desc->hi, inv_desc->lo, + am, (unsigned)VTD_MAMV); return false; } vtd_iotlb_page_invalidate(s, domain_id, addr, am); break; default: - trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid iotlb inv desc: hi=0x%"PRIx64 + ", lo=0x%"PRIx64" (type mismatch: 0x%llx)\n", + __func__, inv_desc->hi, inv_desc->lo, + inv_desc->lo & VTD_INV_DESC_IOTLB_G); return false; } return true; @@ -2012,7 +2036,9 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) || (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) { - trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo); + error_report_once("%s: invalid dev-iotlb inv desc: hi=%"PRIx64 + ", lo=%"PRIx64" (reserved nonzero)", __func__, + inv_desc->hi, inv_desc->lo); return false; } @@ -2103,7 +2129,9 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) break; default: - trace_vtd_inv_desc_invalid(inv_desc.hi, inv_desc.lo); + error_report_once("%s: invalid inv desc: hi=%"PRIx64", lo=%"PRIx64 + " (unknown type)", __func__, inv_desc.hi, + inv_desc.lo); return false; } s->iq_head++; diff --git a/hw/i386/trace-events b/hw/i386/trace-events index 6ac347d18c..77244fc384 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -5,19 +5,15 @@ x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC inv # hw/i386/intel_iommu.c vtd_inv_desc(const char *type, uint64_t hi, uint64_t lo) "invalidate desc type %s high 0x%"PRIx64" low 0x%"PRIx64 -vtd_inv_desc_invalid(uint64_t hi, uint64_t lo) "invalid inv desc hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_inv_desc_cc_domain(uint16_t domain) "context invalidate domain 0x%"PRIx16 vtd_inv_desc_cc_global(void) "context invalidate globally" vtd_inv_desc_cc_device(uint8_t bus, uint8_t dev, uint8_t fn) "context invalidate device %02"PRIx8":%02"PRIx8".%02"PRIx8 vtd_inv_desc_cc_devices(uint16_t sid, uint16_t fmask) "context invalidate devices sid 0x%"PRIx16" fmask 0x%"PRIx16 -vtd_inv_desc_cc_invalid(uint64_t hi, uint64_t lo) "invalid context-cache desc hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_inv_desc_iotlb_global(void) "iotlb invalidate global" vtd_inv_desc_iotlb_domain(uint16_t domain) "iotlb invalidate whole domain 0x%"PRIx16 vtd_inv_desc_iotlb_pages(uint16_t domain, uint64_t addr, uint8_t mask) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8 -vtd_inv_desc_iotlb_invalid(uint64_t hi, uint64_t lo) "invalid iotlb desc hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_inv_desc_wait_sw(uint64_t addr, uint32_t data) "wait invalidate status write addr 0x%"PRIx64" data 0x%"PRIx32 vtd_inv_desc_wait_irq(const char *msg) "%s" -vtd_inv_desc_wait_invalid(uint64_t hi, uint64_t lo) "invalid wait desc hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_inv_desc_wait_write_fail(uint64_t hi, uint64_t lo) "write fail for wait desc hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_inv_desc_iec(uint32_t granularity, uint32_t index, uint32_t mask) "granularity 0x%"PRIx32" index 0x%"PRIx32" mask 0x%"PRIx32 vtd_inv_qi_enable(bool enable) "enabled %d" @@ -27,9 +23,7 @@ vtd_inv_qi_tail(uint16_t head) "write tail %d" vtd_inv_qi_fetch(void) "" vtd_context_cache_reset(void) "" vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present" -vtd_re_invalid(uint64_t hi, uint64_t lo) "invalid root entry hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present" -vtd_ce_invalid(uint64_t hi, uint64_t lo) "invalid context entry hi 0x%"PRIx64" lo 0x%"PRIx64 vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16 vtd_iotlb_page_update(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page update sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16 vtd_iotlb_cc_hit(uint8_t bus, uint8_t devfn, uint64_t high, uint64_t low, uint32_t gen) "IOTLB context hit bus 0x%"PRIx8" devfn 0x%"PRIx8" high 0x%"PRIx64" low 0x%"PRIx64" gen %"PRIu32 From ccc23bb08a84f3709b08cc10ffd4e819832fae6d Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 17 Dec 2018 15:31:12 +0800 Subject: [PATCH 20/44] intel_iommu: dma read/write draining support Support DMA read/write draining should be easy for existing VT-d emulation since the emulation itself does not have any request queue there so we don't need to do anything to flush the un-commited queue. What we need to do is to declare the support. These capabilities are required to pass Windows SVVP test program. It is verified that when with parameters "x-aw-bits=48,caching-mode=off" we can pass the Windows SVVP test with this patch applied. Otherwise we'll fail with: IOMMU[0] - DWD (DMA write draining) not supported IOMMU[0] - DWD (DMA read draining) not supported Segment 0 has no DMA remapping capable IOMMU units However since these bits are not declared support for QEMU<=3.1, we'll need a compatibility bit for it and we turn this on by default only for QEMU>=4.0. Please refer to VT-d spec 6.5.4 for more information. CC: Yu Wang Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1654550 Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 4 ++++ hw/i386/intel_iommu_internal.h | 3 +++ include/hw/i386/intel_iommu.h | 1 + include/hw/i386/pc.h | 5 +++++ 4 files changed, 13 insertions(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 4806d7edb4..26cc731c7b 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2659,6 +2659,7 @@ static Property vtd_properties[] = { DEFINE_PROP_UINT8("x-aw-bits", IntelIOMMUState, aw_bits, VTD_HOST_ADDRESS_WIDTH), DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE), + DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true), DEFINE_PROP_END_OF_LIST(), }; @@ -3147,6 +3148,9 @@ static void vtd_init(IntelIOMMUState *s) s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS | VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits); + if (s->dma_drain) { + s->cap |= VTD_CAP_DRAIN; + } if (s->aw_bits == VTD_HOST_AW_48BIT) { s->cap |= VTD_CAP_SAGAW_48bit; } diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index d084099ed9..00e9edbc66 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -203,6 +203,9 @@ #define VTD_CAP_MAMV (VTD_MAMV << 48) #define VTD_CAP_PSI (1ULL << 39) #define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35)) +#define VTD_CAP_DRAIN_WRITE (1ULL << 54) +#define VTD_CAP_DRAIN_READ (1ULL << 55) +#define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE) #define VTD_CAP_CM (1ULL << 7) /* Supported Adjusted Guest Address Widths */ diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index ed4e758273..a321cc9691 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -245,6 +245,7 @@ struct IntelIOMMUState { OnOffAuto intr_eim; /* Toggle for EIM cabability */ bool buggy_eim; /* Force buggy EIM unless eim=off */ uint8_t aw_bits; /* Host/IOVA address width (in bits) */ + bool dma_drain; /* Whether DMA r/w draining enabled */ /* * Protects IOMMU states in general. Currently it protects the diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 9d29c4b1df..c7c0c944e8 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -296,6 +296,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *); #define PC_COMPAT_3_1 \ HW_COMPAT_3_1 \ + {\ + .driver = "intel-iommu",\ + .property = "dma-drain",\ + .value = "off",\ + }, #define PC_COMPAT_3_0 \ HW_COMPAT_3_0 \ From 4b49b586c452f1df393f2d8ffdfb1516857801d2 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 17 Dec 2018 15:31:13 +0800 Subject: [PATCH 21/44] intel_iommu: remove "x-" prefix for "aw-bits" We're going to have 57bits aw-bits support sooner. It's possibly time to remove the "x-" prefix. Signed-off-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 26cc731c7b..96ef31eb7e 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2656,7 +2656,7 @@ static Property vtd_properties[] = { DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim, ON_OFF_AUTO_AUTO), DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false), - DEFINE_PROP_UINT8("x-aw-bits", IntelIOMMUState, aw_bits, + DEFINE_PROP_UINT8("aw-bits", IntelIOMMUState, aw_bits, VTD_HOST_ADDRESS_WIDTH), DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE), DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true), From cd5a5271088863c3eae317e8d214e5b98773dcb4 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 17 Dec 2018 11:48:31 +0100 Subject: [PATCH 22/44] hw: acpi: The RSDP build API can return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For both x86 and ARM architectures, the internal RSDP build API can return void as the current return value is unused. Signed-off-by: Samuel Ortiz Reviewed-by: Igor Mammedov Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Reviewed-by: Thomas Huth Reviewed-by: Andrew Jones Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/arm/virt-acpi-build.c | 4 +--- hw/i386/acpi-build.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 5785fb697c..fcaa350892 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -367,7 +367,7 @@ static void acpi_dsdt_add_power_button(Aml *scope) } /* RSDP */ -static GArray * +static void build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned xsdt_tbl_offset) { AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp); @@ -392,8 +392,6 @@ build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned xsdt_tbl_offset) bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, (char *)rsdp - rsdp_table->data, sizeof *rsdp, (char *)&rsdp->checksum - rsdp_table->data); - - return rsdp_table; } static void diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 236a20eaa8..35f17d0d91 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2547,7 +2547,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker) "IVRS", table_data->len - iommu_start, 1, NULL, NULL); } -static GArray * +static void build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset) { AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp); @@ -2569,8 +2569,6 @@ build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset) bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, (char *)rsdp - rsdp_table->data, sizeof *rsdp, (char *)&rsdp->checksum - rsdp_table->data); - - return rsdp_table; } typedef From 4774866457a675a751c38fb0d9ed09113777ec15 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 17 Dec 2018 11:48:32 +0100 Subject: [PATCH 23/44] hw: arm: acpi: Fix incorrect checksums in RSDP When RSDP table was introduced (d4bec5d87), we calculated only legacy checksum, and that was incorrect as it - specified rev=2 and forgot about extended checksum. - legacy checksum calculated on full table instead of the 1st 20 bytes Fix it by adding extended checksum calculation and using correct size for legacy checksum. While at it use explicit constants to specify sub/full tables sizes instead of relying on AcpiRsdpDescriptor size and fields offsets. The follow up commits will convert this table to build_append_int_noprefix() API, will use constants anyway and remove unused AcpiRsdpDescriptor structure. Based on "[PATCH v5 05/24] hw: acpi: Implement XSDT support for RSDP" by Samuel Ortiz, who did it right in his impl. Fixes: d4bec5d87 ("hw/arm/virt-acpi-build: Generate RSDP table") Signed-off-by: Igor Mammedov CC: Ard Biesheuvel CC: Shannon Zhao Reviewed-by: Andrew Jones Reviewed-by: Samuel Ortiz Signed-off-by: Samuel Ortiz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/arm/virt-acpi-build.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index fcaa350892..0835900052 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -390,8 +390,13 @@ build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned xsdt_tbl_offset) /* Checksum to be filled by Guest linker */ bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, - (char *)rsdp - rsdp_table->data, sizeof *rsdp, + (char *)rsdp - rsdp_table->data, 20 /* ACPI rev 1.0 RSDP size */, (char *)&rsdp->checksum - rsdp_table->data); + + /* Extended checksum to be filled by Guest linker */ + bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, + (char *)rsdp - rsdp_table->data, 36 /* ACPI rev 2.0 RSDP size */, + (char *)&rsdp->extended_checksum - rsdp_table->data); } static void From 3bb3006a632da5b11ec7a154d5b819b8bfab8dab Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 17 Dec 2018 11:48:33 +0100 Subject: [PATCH 24/44] hw: i386: Use correct RSDT length for checksum AcpiRsdpDescriptor describes revision 2 RSDP table so using sizeof(*rsdp) for checksum calculation isn't correct since we are adding extra 16 bytes. But acpi_data_push() zeroes out table, so just by luck we are summing up exta zeros which still yelds correct checksum. Fix it up by explicitly stating table size instead of using pointer arithmetics on stucture. PS: Extra 16 bytes are still wasted, but droping them will break migration for machines older than 2.3 due to size mismatch, for 2.3 and older it's not an issue since they are using resizable memory regions (a1666142d) for ACPI blobs. So keep wasting memory to avoid breaking old machines. Fixes: 72c194f7e (i386: ACPI table generation code from seabios) Signed-off-by: Igor Mammedov Reviewed-by: Samuel Ortiz Signed-off-by: Samuel Ortiz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 35f17d0d91..fb877648ac 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2550,6 +2550,11 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker) static void build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset) { + /* AcpiRsdpDescriptor describes revision 2 RSDP table and as result we + * allocate extra 16 bytes for pc/q35 RSDP rev1 as well. Keep extra 16 bytes + * wasted to make sure we won't breake migration for machine types older + * than 2.3 due to size mismatch. + */ AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp); unsigned rsdt_pa_size = sizeof(rsdp->rsdt_physical_address); unsigned rsdt_pa_offset = @@ -2567,7 +2572,7 @@ build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset) /* Checksum to be filled by Guest linker */ bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, - (char *)rsdp - rsdp_table->data, sizeof *rsdp, + (char *)rsdp - rsdp_table->data, 20 /* ACPI rev 1.0 RSDP size */, (char *)&rsdp->checksum - rsdp_table->data); } From 5c5fce1ab5672a0a9a50c867114481a93298bf23 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 17 Dec 2018 11:48:34 +0100 Subject: [PATCH 25/44] hw: arm: Carry RSDP specific data through AcpiRsdpData That will allow us to generalize the ARM build_rsdp() routine to support both legacy RSDP (The current i386 implementation) and extended RSDP (The ARM implementation). Signed-off-by: Samuel Ortiz Reviewed-by: Igor Mammedov Reviewed-by: Andrew Jones Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/arm/virt-acpi-build.c | 18 +++++++++++++----- include/hw/acpi/acpi-defs.h | 8 ++++++++ 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 0835900052..ce8bfa5a37 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -368,7 +368,7 @@ static void acpi_dsdt_add_power_button(Aml *scope) /* RSDP */ static void -build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned xsdt_tbl_offset) +build_rsdp(GArray *rsdp_table, BIOSLinker *linker, AcpiRsdpData *rsdp_data) { AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp); unsigned xsdt_pa_size = sizeof(rsdp->xsdt_physical_address); @@ -379,14 +379,14 @@ build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned xsdt_tbl_offset) true /* fseg memory */); memcpy(&rsdp->signature, "RSD PTR ", sizeof(rsdp->signature)); - memcpy(rsdp->oem_id, ACPI_BUILD_APPNAME6, sizeof(rsdp->oem_id)); + memcpy(rsdp->oem_id, rsdp_data->oem_id, sizeof(rsdp->oem_id)); rsdp->length = cpu_to_le32(sizeof(*rsdp)); - rsdp->revision = 0x02; + rsdp->revision = rsdp_data->revision; /* Address to be filled by Guest linker */ bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE, xsdt_pa_offset, xsdt_pa_size, - ACPI_BUILD_TABLE_FILE, xsdt_tbl_offset); + ACPI_BUILD_TABLE_FILE, *rsdp_data->xsdt_tbl_offset); /* Checksum to be filled by Guest linker */ bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, @@ -857,7 +857,15 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) build_xsdt(tables_blob, tables->linker, table_offsets, NULL, NULL); /* RSDP is in FSEG memory, so allocate it separately */ - build_rsdp(tables->rsdp, tables->linker, xsdt); + { + AcpiRsdpData rsdp_data = { + .revision = 2, + .oem_id = ACPI_BUILD_APPNAME6, + .xsdt_tbl_offset = &xsdt, + .rsdt_tbl_offset = NULL, + }; + build_rsdp(tables->rsdp, tables->linker, &rsdp_data); + } /* Cleanup memory that's no longer used. */ g_array_free(table_offsets, true); diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h index af8e023968..8425ecb8c6 100644 --- a/include/hw/acpi/acpi-defs.h +++ b/include/hw/acpi/acpi-defs.h @@ -53,6 +53,14 @@ struct AcpiRsdpDescriptor { /* Root System Descriptor Pointer */ } QEMU_PACKED; typedef struct AcpiRsdpDescriptor AcpiRsdpDescriptor; +typedef struct AcpiRsdpData { + uint8_t oem_id[6]; /* OEM identification */ + uint8_t revision; /* Must be 0 for 1.0, 2 for 2.0 */ + + unsigned *rsdt_tbl_offset; + unsigned *xsdt_tbl_offset; +} AcpiRsdpData; + /* Table structure from Linux kernel (the ACPI tables are under the BSD license) */ From 77321eaf15aacdd11cd6dd3efef0c6dd26629e7f Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 17 Dec 2018 11:48:35 +0100 Subject: [PATCH 26/44] hw: arm: Convert the RSDP build to the buid_append_foo() API Instead of filling a mapped and packed C structure field in random order and being careful about endianness and sizes, build_rsdp() now uses build_append_int_noprefix() to compose RSDP table. This makes reviewing and maintaining code easier as this is almost matching 1:1 the ACPI spec itself. Signed-off-by: Samuel Ortiz Reviewed-by: Igor Mammedov Reviewed-by: Andrew Jones Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/arm/virt-acpi-build.c | 42 ++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index ce8bfa5a37..4a6b53fbfc 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -368,35 +368,39 @@ static void acpi_dsdt_add_power_button(Aml *scope) /* RSDP */ static void -build_rsdp(GArray *rsdp_table, BIOSLinker *linker, AcpiRsdpData *rsdp_data) +build_rsdp(GArray *tbl, BIOSLinker *linker, AcpiRsdpData *rsdp_data) { - AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp); - unsigned xsdt_pa_size = sizeof(rsdp->xsdt_physical_address); - unsigned xsdt_pa_offset = - (char *)&rsdp->xsdt_physical_address - rsdp_table->data; + int tbl_off = tbl->len; /* Table offset in the RSDP file */ - bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, rsdp_table, 16, + bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, tbl, 16, true /* fseg memory */); - memcpy(&rsdp->signature, "RSD PTR ", sizeof(rsdp->signature)); - memcpy(rsdp->oem_id, rsdp_data->oem_id, sizeof(rsdp->oem_id)); - rsdp->length = cpu_to_le32(sizeof(*rsdp)); - rsdp->revision = rsdp_data->revision; + g_array_append_vals(tbl, "RSD PTR ", 8); /* Signature */ + build_append_int_noprefix(tbl, 0, 1); /* Checksum */ + g_array_append_vals(tbl, rsdp_data->oem_id, 6); /* OEMID */ + build_append_int_noprefix(tbl, rsdp_data->revision, 1); /* Revision */ + build_append_int_noprefix(tbl, 0, 4); /* RsdtAddress */ + build_append_int_noprefix(tbl, 36, 4); /* Length */ - /* Address to be filled by Guest linker */ - bios_linker_loader_add_pointer(linker, - ACPI_BUILD_RSDP_FILE, xsdt_pa_offset, xsdt_pa_size, - ACPI_BUILD_TABLE_FILE, *rsdp_data->xsdt_tbl_offset); + /* XSDT address to be filled by guest linker */ + build_append_int_noprefix(tbl, 0, 8); /* XsdtAddress */ + bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE, + tbl_off + 24, 8, + ACPI_BUILD_TABLE_FILE, + *rsdp_data->xsdt_tbl_offset); - /* Checksum to be filled by Guest linker */ + build_append_int_noprefix(tbl, 0, 1); /* Extended Checksum */ + build_append_int_noprefix(tbl, 0, 3); /* Reserved */ + + /* Checksum to be filled by guest linker */ bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, - (char *)rsdp - rsdp_table->data, 20 /* ACPI rev 1.0 RSDP size */, - (char *)&rsdp->checksum - rsdp_table->data); + tbl_off, 20, /* ACPI rev 1.0 RSDP size */ + 8); /* Extended checksum to be filled by Guest linker */ bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, - (char *)rsdp - rsdp_table->data, 36 /* ACPI rev 2.0 RSDP size */, - (char *)&rsdp->extended_checksum - rsdp_table->data); + tbl_off, 36, /* ACPI rev 2.0 RSDP size */ + 32); } static void From f10f38b87695f82ee332fb834e2dc38d579fb2f1 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 17 Dec 2018 11:48:36 +0100 Subject: [PATCH 27/44] hw: arm: Support both legacy and current RSDP build We add the ability to build legacy or current RSDP tables, based on the AcpiRsdpData revision field passed to build_rsdp(). Although arm/virt only uses RSDP v2, adding that capability to build_rsdp will allow us to share the RSDP build code between ARM and x86. Signed-off-by: Samuel Ortiz Reviewed-by: Igor Mammedov Reviewed-by: Andrew Jones Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/arm/virt-acpi-build.c | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 4a6b53fbfc..05f6654371 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -372,6 +372,20 @@ build_rsdp(GArray *tbl, BIOSLinker *linker, AcpiRsdpData *rsdp_data) { int tbl_off = tbl->len; /* Table offset in the RSDP file */ + switch (rsdp_data->revision) { + case 0: + /* With ACPI 1.0, we must have an RSDT pointer */ + g_assert(rsdp_data->rsdt_tbl_offset); + break; + case 2: + /* With ACPI 2.0+, we must have an XSDT pointer */ + g_assert(rsdp_data->xsdt_tbl_offset); + break; + default: + /* Only revisions 0 (ACPI 1.0) and 2 (ACPI 2.0+) are valid for RSDP */ + g_assert_not_reached(); + } + bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, tbl, 16, true /* fseg memory */); @@ -380,10 +394,29 @@ build_rsdp(GArray *tbl, BIOSLinker *linker, AcpiRsdpData *rsdp_data) g_array_append_vals(tbl, rsdp_data->oem_id, 6); /* OEMID */ build_append_int_noprefix(tbl, rsdp_data->revision, 1); /* Revision */ build_append_int_noprefix(tbl, 0, 4); /* RsdtAddress */ + if (rsdp_data->rsdt_tbl_offset) { + /* RSDT address to be filled by guest linker */ + bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE, + tbl_off + 16, 4, + ACPI_BUILD_TABLE_FILE, + *rsdp_data->rsdt_tbl_offset); + } + + /* Checksum to be filled by guest linker */ + bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, + tbl_off, 20, /* ACPI rev 1.0 RSDP size */ + 8); + + if (rsdp_data->revision == 0) { + /* ACPI 1.0 RSDP, we're done */ + return; + } + build_append_int_noprefix(tbl, 36, 4); /* Length */ /* XSDT address to be filled by guest linker */ build_append_int_noprefix(tbl, 0, 8); /* XsdtAddress */ + /* We already validated our xsdt pointer */ bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE, tbl_off + 24, 8, ACPI_BUILD_TABLE_FILE, @@ -392,11 +425,6 @@ build_rsdp(GArray *tbl, BIOSLinker *linker, AcpiRsdpData *rsdp_data) build_append_int_noprefix(tbl, 0, 1); /* Extended Checksum */ build_append_int_noprefix(tbl, 0, 3); /* Reserved */ - /* Checksum to be filled by guest linker */ - bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, - tbl_off, 20, /* ACPI rev 1.0 RSDP size */ - 8); - /* Extended checksum to be filled by Guest linker */ bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, tbl_off, 36, /* ACPI rev 2.0 RSDP size */ From a46ce1c26d5c555b825e9486da422b1ab8f7faa1 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 17 Dec 2018 16:34:48 +0100 Subject: [PATCH 28/44] hw: acpi: Export and share the ARM RSDP build Now that build_rsdp() supports building both legacy and current RSDP tables, we can move it to a generic folder (hw/acpi) and have the i386 ACPI code reuse it in order to reduce code duplication. Signed-off-by: Samuel Ortiz Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Andrew Jones --- hw/acpi/aml-build.c | 68 +++++++++++++++++++++++++++++++++++++ hw/arm/virt-acpi-build.c | 65 ----------------------------------- hw/i386/acpi-build.c | 49 +++++++++++--------------- include/hw/acpi/aml-build.h | 2 ++ 4 files changed, 89 insertions(+), 95 deletions(-) diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 1e43cd736d..555c24f21d 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -1589,6 +1589,74 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre) g_array_free(tables->vmgenid, mfre); } +/* + * ACPI spec 5.2.5.3 Root System Description Pointer (RSDP). + * (Revision 1.0 or later) + */ +void +build_rsdp(GArray *tbl, BIOSLinker *linker, AcpiRsdpData *rsdp_data) +{ + int tbl_off = tbl->len; /* Table offset in the RSDP file */ + + switch (rsdp_data->revision) { + case 0: + /* With ACPI 1.0, we must have an RSDT pointer */ + g_assert(rsdp_data->rsdt_tbl_offset); + break; + case 2: + /* With ACPI 2.0+, we must have an XSDT pointer */ + g_assert(rsdp_data->xsdt_tbl_offset); + break; + default: + /* Only revisions 0 (ACPI 1.0) and 2 (ACPI 2.0+) are valid for RSDP */ + g_assert_not_reached(); + } + + bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, tbl, 16, + true /* fseg memory */); + + g_array_append_vals(tbl, "RSD PTR ", 8); /* Signature */ + build_append_int_noprefix(tbl, 0, 1); /* Checksum */ + g_array_append_vals(tbl, rsdp_data->oem_id, 6); /* OEMID */ + build_append_int_noprefix(tbl, rsdp_data->revision, 1); /* Revision */ + build_append_int_noprefix(tbl, 0, 4); /* RsdtAddress */ + if (rsdp_data->rsdt_tbl_offset) { + /* RSDT address to be filled by guest linker */ + bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE, + tbl_off + 16, 4, + ACPI_BUILD_TABLE_FILE, + *rsdp_data->rsdt_tbl_offset); + } + + /* Checksum to be filled by guest linker */ + bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, + tbl_off, 20, /* ACPI rev 1.0 RSDP size */ + 8); + + if (rsdp_data->revision == 0) { + /* ACPI 1.0 RSDP, we're done */ + return; + } + + build_append_int_noprefix(tbl, 36, 4); /* Length */ + + /* XSDT address to be filled by guest linker */ + build_append_int_noprefix(tbl, 0, 8); /* XsdtAddress */ + /* We already validated our xsdt pointer */ + bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE, + tbl_off + 24, 8, + ACPI_BUILD_TABLE_FILE, + *rsdp_data->xsdt_tbl_offset); + + build_append_int_noprefix(tbl, 0, 1); /* Extended Checksum */ + build_append_int_noprefix(tbl, 0, 3); /* Reserved */ + + /* Extended checksum to be filled by Guest linker */ + bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, + tbl_off, 36, /* ACPI rev 2.0 RSDP size */ + 32); +} + /* Build rsdt table */ void build_rsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets, diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 05f6654371..95fad6f0ce 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -366,71 +366,6 @@ static void acpi_dsdt_add_power_button(Aml *scope) aml_append(scope, dev); } -/* RSDP */ -static void -build_rsdp(GArray *tbl, BIOSLinker *linker, AcpiRsdpData *rsdp_data) -{ - int tbl_off = tbl->len; /* Table offset in the RSDP file */ - - switch (rsdp_data->revision) { - case 0: - /* With ACPI 1.0, we must have an RSDT pointer */ - g_assert(rsdp_data->rsdt_tbl_offset); - break; - case 2: - /* With ACPI 2.0+, we must have an XSDT pointer */ - g_assert(rsdp_data->xsdt_tbl_offset); - break; - default: - /* Only revisions 0 (ACPI 1.0) and 2 (ACPI 2.0+) are valid for RSDP */ - g_assert_not_reached(); - } - - bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, tbl, 16, - true /* fseg memory */); - - g_array_append_vals(tbl, "RSD PTR ", 8); /* Signature */ - build_append_int_noprefix(tbl, 0, 1); /* Checksum */ - g_array_append_vals(tbl, rsdp_data->oem_id, 6); /* OEMID */ - build_append_int_noprefix(tbl, rsdp_data->revision, 1); /* Revision */ - build_append_int_noprefix(tbl, 0, 4); /* RsdtAddress */ - if (rsdp_data->rsdt_tbl_offset) { - /* RSDT address to be filled by guest linker */ - bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE, - tbl_off + 16, 4, - ACPI_BUILD_TABLE_FILE, - *rsdp_data->rsdt_tbl_offset); - } - - /* Checksum to be filled by guest linker */ - bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, - tbl_off, 20, /* ACPI rev 1.0 RSDP size */ - 8); - - if (rsdp_data->revision == 0) { - /* ACPI 1.0 RSDP, we're done */ - return; - } - - build_append_int_noprefix(tbl, 36, 4); /* Length */ - - /* XSDT address to be filled by guest linker */ - build_append_int_noprefix(tbl, 0, 8); /* XsdtAddress */ - /* We already validated our xsdt pointer */ - bios_linker_loader_add_pointer(linker, ACPI_BUILD_RSDP_FILE, - tbl_off + 24, 8, - ACPI_BUILD_TABLE_FILE, - *rsdp_data->xsdt_tbl_offset); - - build_append_int_noprefix(tbl, 0, 1); /* Extended Checksum */ - build_append_int_noprefix(tbl, 0, 3); /* Reserved */ - - /* Extended checksum to be filled by Guest linker */ - bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, - tbl_off, 36, /* ACPI rev 2.0 RSDP size */ - 32); -} - static void build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) { diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index fb877648ac..9891b6913b 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2547,35 +2547,6 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker) "IVRS", table_data->len - iommu_start, 1, NULL, NULL); } -static void -build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset) -{ - /* AcpiRsdpDescriptor describes revision 2 RSDP table and as result we - * allocate extra 16 bytes for pc/q35 RSDP rev1 as well. Keep extra 16 bytes - * wasted to make sure we won't breake migration for machine types older - * than 2.3 due to size mismatch. - */ - AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp); - unsigned rsdt_pa_size = sizeof(rsdp->rsdt_physical_address); - unsigned rsdt_pa_offset = - (char *)&rsdp->rsdt_physical_address - rsdp_table->data; - - bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, rsdp_table, 16, - true /* fseg memory */); - - memcpy(&rsdp->signature, "RSD PTR ", 8); - memcpy(rsdp->oem_id, ACPI_BUILD_APPNAME6, 6); - /* Address to be filled by Guest linker */ - bios_linker_loader_add_pointer(linker, - ACPI_BUILD_RSDP_FILE, rsdt_pa_offset, rsdt_pa_size, - ACPI_BUILD_TABLE_FILE, rsdt_tbl_offset); - - /* Checksum to be filled by Guest linker */ - bios_linker_loader_add_checksum(linker, ACPI_BUILD_RSDP_FILE, - (char *)rsdp - rsdp_table->data, 20 /* ACPI rev 1.0 RSDP size */, - (char *)&rsdp->checksum - rsdp_table->data); -} - typedef struct AcpiBuildState { /* Copy of table in RAM (for patching). */ @@ -2732,7 +2703,25 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) slic_oem.id, slic_oem.table_id); /* RSDP is in FSEG memory, so allocate it separately */ - build_rsdp(tables->rsdp, tables->linker, rsdt); + { + AcpiRsdpData rsdp_data = { + .revision = 0, + .oem_id = ACPI_BUILD_APPNAME6, + .xsdt_tbl_offset = NULL, + .rsdt_tbl_offset = &rsdt, + }; + build_rsdp(tables->rsdp, tables->linker, &rsdp_data); + if (!pcmc->rsdp_in_ram) { + /* We used to allocate some extra space for RSDP revision 2 but + * only used the RSDP revision 0 space. The extra bytes were + * zeroed out and not used. + * Here we continue wasting those extra 16 bytes to make sure we + * don't break migration for machine types 2.2 and older due to + * RSDP blob size mismatch. + */ + build_append_int_noprefix(tables->rsdp, 0, 16); + } + } /* We'll expose it all to Guest so we want to reduce * chance of size changes. diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index 6c36903c0a..1a563ad756 100644 --- a/include/hw/acpi/aml-build.h +++ b/include/hw/acpi/aml-build.h @@ -388,6 +388,8 @@ void acpi_add_table(GArray *table_offsets, GArray *table_data); void acpi_build_tables_init(AcpiBuildTables *tables); void acpi_build_tables_cleanup(AcpiBuildTables *tables, bool mfre); void +build_rsdp(GArray *tbl, BIOSLinker *linker, AcpiRsdpData *rsdp_data); +void build_rsdt(GArray *table_data, BIOSLinker *linker, GArray *table_offsets, const char *oem_id, const char *oem_table_id); void From d6caf3631cf561f814aa449bbf9bcb03b86acd2f Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 20 Dec 2018 16:02:55 +0100 Subject: [PATCH 29/44] hw: acpi: Remove AcpiRsdpDescriptor and fix tests The only remaining AcpiRsdpDescriptor users are the ACPI utils for the BIOS table tests. We remove that dependency and can thus remove the structure itself. Signed-off-by: Samuel Ortiz Reviewed-by: Igor Mammedov Reviewed-by: Andrew Jones Signed-off-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/acpi/acpi-defs.h | 13 ---------- tests/acpi-utils.c | 47 ++++++++++++++++++++++++++++++------- tests/acpi-utils.h | 5 ++-- tests/bios-tables-test.c | 22 +++++++++++++---- tests/vmgenid-test.c | 8 ++++--- 5 files changed, 63 insertions(+), 32 deletions(-) diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h index 8425ecb8c6..5021cb9e79 100644 --- a/include/hw/acpi/acpi-defs.h +++ b/include/hw/acpi/acpi-defs.h @@ -40,19 +40,6 @@ enum { ACPI_FADT_F_LOW_POWER_S0_IDLE_CAPABLE, }; -struct AcpiRsdpDescriptor { /* Root System Descriptor Pointer */ - uint64_t signature; /* ACPI signature, contains "RSD PTR " */ - uint8_t checksum; /* To make sum of struct == 0 */ - uint8_t oem_id [6]; /* OEM identification */ - uint8_t revision; /* Must be 0 for 1.0, 2 for 2.0 */ - uint32_t rsdt_physical_address; /* 32-bit physical address of RSDT */ - uint32_t length; /* XSDT Length in bytes including hdr */ - uint64_t xsdt_physical_address; /* 64-bit physical address of XSDT */ - uint8_t extended_checksum; /* Checksum of entire table */ - uint8_t reserved [3]; /* Reserved field must be 0 */ -} QEMU_PACKED; -typedef struct AcpiRsdpDescriptor AcpiRsdpDescriptor; - typedef struct AcpiRsdpData { uint8_t oem_id[6]; /* OEM identification */ uint8_t revision; /* Must be 0 for 1.0, 2 for 2.0 */ diff --git a/tests/acpi-utils.c b/tests/acpi-utils.c index 7b444a7453..17abcc43a4 100644 --- a/tests/acpi-utils.c +++ b/tests/acpi-utils.c @@ -51,15 +51,44 @@ uint32_t acpi_find_rsdp_address(QTestState *qts) return off; } -void acpi_parse_rsdp_table(QTestState *qts, uint32_t addr, - AcpiRsdpDescriptor *rsdp_table) +uint32_t acpi_get_rsdt_address(uint8_t *rsdp_table) { - ACPI_READ_FIELD(qts, rsdp_table->signature, addr); - ACPI_ASSERT_CMP64(rsdp_table->signature, "RSD PTR "); + uint32_t rsdt_physical_address; - ACPI_READ_FIELD(qts, rsdp_table->checksum, addr); - ACPI_READ_ARRAY(qts, rsdp_table->oem_id, addr); - ACPI_READ_FIELD(qts, rsdp_table->revision, addr); - ACPI_READ_FIELD(qts, rsdp_table->rsdt_physical_address, addr); - ACPI_READ_FIELD(qts, rsdp_table->length, addr); + memcpy(&rsdt_physical_address, &rsdp_table[16 /* RsdtAddress offset */], 4); + return le32_to_cpu(rsdt_physical_address); +} + +uint64_t acpi_get_xsdt_address(uint8_t *rsdp_table) +{ + uint64_t xsdt_physical_address; + uint8_t revision = rsdp_table[15 /* Revision offset */]; + + /* We must have revision 2 if we're looking for an XSDT pointer */ + g_assert(revision == 2); + + memcpy(&xsdt_physical_address, &rsdp_table[24 /* XsdtAddress offset */], 8); + return le64_to_cpu(xsdt_physical_address); +} + +void acpi_parse_rsdp_table(QTestState *qts, uint32_t addr, uint8_t *rsdp_table) +{ + uint8_t revision; + + /* Read mandatory revision 0 table data (20 bytes) first */ + qtest_memread(qts, addr, rsdp_table, 20); + revision = rsdp_table[15 /* Revision offset */]; + + switch (revision) { + case 0: /* ACPI 1.0 RSDP */ + break; + case 2: /* ACPI 2.0+ RSDP */ + /* Read the rest of the RSDP table */ + qtest_memread(qts, addr + 20, rsdp_table + 20, 16); + break; + default: + g_assert_not_reached(); + } + + ACPI_ASSERT_CMP64(*((uint64_t *)(rsdp_table)), "RSD PTR "); } diff --git a/tests/acpi-utils.h b/tests/acpi-utils.h index 791bd5410e..c5b0e12aa2 100644 --- a/tests/acpi-utils.h +++ b/tests/acpi-utils.h @@ -74,7 +74,8 @@ typedef struct { uint8_t acpi_calc_checksum(const uint8_t *data, int len); uint32_t acpi_find_rsdp_address(QTestState *qts); -void acpi_parse_rsdp_table(QTestState *qts, uint32_t addr, - AcpiRsdpDescriptor *rsdp_table); +uint32_t acpi_get_rsdt_address(uint8_t *rsdp_table); +uint64_t acpi_get_xsdt_address(uint8_t *rsdp_table); +void acpi_parse_rsdp_table(QTestState *qts, uint32_t addr, uint8_t *rsdp_table); #endif /* TEST_ACPI_UTILS_H */ diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c index 2b6004618f..d455b2abfc 100644 --- a/tests/bios-tables-test.c +++ b/tests/bios-tables-test.c @@ -27,7 +27,7 @@ typedef struct { const char *machine; const char *variant; uint32_t rsdp_addr; - AcpiRsdpDescriptor rsdp_table; + uint8_t rsdp_table[36 /* ACPI 2.0+ RSDP size */]; AcpiRsdtDescriptorRev1 rsdt_table; uint32_t dsdt_addr; uint32_t facs_addr; @@ -86,19 +86,31 @@ static void test_acpi_rsdp_address(test_data *data) static void test_acpi_rsdp_table(test_data *data) { - AcpiRsdpDescriptor *rsdp_table = &data->rsdp_table; + uint8_t *rsdp_table = data->rsdp_table, revision; uint32_t addr = data->rsdp_addr; acpi_parse_rsdp_table(data->qts, addr, rsdp_table); + revision = rsdp_table[15 /* Revision offset */]; - /* rsdp checksum is not for the whole table, but for the first 20 bytes */ - g_assert(!acpi_calc_checksum((uint8_t *)rsdp_table, 20)); + switch (revision) { + case 0: /* ACPI 1.0 RSDP */ + /* With rev 1, checksum is only for the first 20 bytes */ + g_assert(!acpi_calc_checksum(rsdp_table, 20)); + break; + case 2: /* ACPI 2.0+ RSDP */ + /* With revision 2, we have 2 checksums */ + g_assert(!acpi_calc_checksum(rsdp_table, 20)); + g_assert(!acpi_calc_checksum(rsdp_table, 36)); + break; + default: + g_assert_not_reached(); + } } static void test_acpi_rsdt_table(test_data *data) { AcpiRsdtDescriptorRev1 *rsdt_table = &data->rsdt_table; - uint32_t addr = le32_to_cpu(data->rsdp_table.rsdt_physical_address); + uint32_t addr = acpi_get_rsdt_address(data->rsdp_table); uint32_t *tables; int tables_nr; uint8_t checksum; diff --git a/tests/vmgenid-test.c b/tests/vmgenid-test.c index 84449ce8ae..1c1d435bbd 100644 --- a/tests/vmgenid-test.c +++ b/tests/vmgenid-test.c @@ -35,7 +35,7 @@ static uint32_t acpi_find_vgia(QTestState *qts) { uint32_t rsdp_offset; uint32_t guid_offset = 0; - AcpiRsdpDescriptor rsdp_table; + uint8_t rsdp_table[36 /* ACPI 2.0+ RSDP size */]; uint32_t rsdt, rsdt_table_length; AcpiRsdtDescriptorRev1 rsdt_table; size_t tables_nr; @@ -52,9 +52,11 @@ static uint32_t acpi_find_vgia(QTestState *qts) g_assert_cmphex(rsdp_offset, <, RSDP_ADDR_INVALID); - acpi_parse_rsdp_table(qts, rsdp_offset, &rsdp_table); + acpi_parse_rsdp_table(qts, rsdp_offset, rsdp_table); + + rsdt = acpi_get_rsdt_address(rsdp_table); + g_assert(rsdt); - rsdt = le32_to_cpu(rsdp_table.rsdt_physical_address); /* read the header */ ACPI_READ_TABLE_HEADER(qts, &rsdt_table, rsdt); ACPI_ASSERT_CMP(rsdt_table.signature, "RSDT"); From cc425b5ddfe8f8474814f369409e637c36898542 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 17 Dec 2018 17:57:37 +0100 Subject: [PATCH 30/44] hw/i386: Remove deprecated machines pc-0.10 and pc-0.11 They've been deprecated for two releases and nobody complained that they are still required anymore, so it's time to remove these now. And while we're at it, mark the other remaining old 0.x machine types as deprecated (since they can not properly be used for live-migration anyway). Signed-off-by: Thomas Huth Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Eduardo Habkost --- hw/i386/pc_piix.c | 70 ++----------------------------------------- qemu-deprecated.texi | 2 +- tests/cpu-plug-test.c | 4 +-- 3 files changed, 4 insertions(+), 72 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index e000c7511a..7f1cb527b5 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -368,7 +368,7 @@ static void pc_compat_1_2(MachineState *machine) x86_cpu_change_kvm_default("kvm-pv-eoi", NULL); } -/* PC compat function for pc-0.10 to pc-0.13 */ +/* PC compat function for pc-0.12 and pc-0.13 */ static void pc_compat_0_13(MachineState *machine) { pc_compat_1_2(machine); @@ -834,6 +834,7 @@ static void pc_i440fx_0_15_machine_options(MachineClass *m) { pc_i440fx_1_0_machine_options(m); m->hw_version = "0.15"; + m->deprecation_reason = "use a newer machine type instead"; SET_MACHINE_COMPAT(m, PC_COMPAT_0_15); } @@ -951,73 +952,6 @@ static void pc_i440fx_0_12_machine_options(MachineClass *m) DEFINE_I440FX_MACHINE(v0_12, "pc-0.12", pc_compat_0_13, pc_i440fx_0_12_machine_options); - -#define PC_COMPAT_0_11 \ - PC_CPU_MODEL_IDS("0.11") \ - {\ - .driver = "virtio-blk-pci",\ - .property = "vectors",\ - .value = stringify(0),\ - },{\ - .driver = TYPE_PCI_DEVICE,\ - .property = "rombar",\ - .value = stringify(0),\ - },{\ - .driver = "ide-drive",\ - .property = "ver",\ - .value = "0.11",\ - },{\ - .driver = "scsi-disk",\ - .property = "ver",\ - .value = "0.11",\ - }, - -static void pc_i440fx_0_11_machine_options(MachineClass *m) -{ - pc_i440fx_0_12_machine_options(m); - m->hw_version = "0.11"; - m->deprecation_reason = "use a newer machine type instead"; - SET_MACHINE_COMPAT(m, PC_COMPAT_0_11); -} - -DEFINE_I440FX_MACHINE(v0_11, "pc-0.11", pc_compat_0_13, - pc_i440fx_0_11_machine_options); - - -#define PC_COMPAT_0_10 \ - PC_CPU_MODEL_IDS("0.10") \ - {\ - .driver = "virtio-blk-pci",\ - .property = "class",\ - .value = stringify(PCI_CLASS_STORAGE_OTHER),\ - },{\ - .driver = "virtio-serial-pci",\ - .property = "class",\ - .value = stringify(PCI_CLASS_DISPLAY_OTHER),\ - },{\ - .driver = "virtio-net-pci",\ - .property = "vectors",\ - .value = stringify(0),\ - },{\ - .driver = "ide-drive",\ - .property = "ver",\ - .value = "0.10",\ - },{\ - .driver = "scsi-disk",\ - .property = "ver",\ - .value = "0.10",\ - }, - -static void pc_i440fx_0_10_machine_options(MachineClass *m) -{ - pc_i440fx_0_11_machine_options(m); - m->hw_version = "0.10"; - SET_MACHINE_COMPAT(m, PC_COMPAT_0_10); -} - -DEFINE_I440FX_MACHINE(v0_10, "pc-0.10", pc_compat_0_13, - pc_i440fx_0_10_machine_options); - typedef struct { uint16_t gpu_device_id; uint16_t pch_device_id; diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi index e362d37225..c3735b698e 100644 --- a/qemu-deprecated.texi +++ b/qemu-deprecated.texi @@ -134,7 +134,7 @@ their usecases. @section System emulator machines -@subsection pc-0.10 and pc-0.11 (since 3.0) +@subsection pc-0.12, pc-0.13, pc-0.14 and pc-0.15 (since 4.0) These machine types are very old and likely can not be used for live migration from old QEMU versions anymore. A newer machine type should be used instead. diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c index f4a677d238..668f00144e 100644 --- a/tests/cpu-plug-test.c +++ b/tests/cpu-plug-test.c @@ -157,9 +157,7 @@ static void add_pc_test_case(const char *mname) (strcmp(mname, "pc-0.15") == 0) || (strcmp(mname, "pc-0.14") == 0) || (strcmp(mname, "pc-0.13") == 0) || - (strcmp(mname, "pc-0.12") == 0) || - (strcmp(mname, "pc-0.11") == 0) || - (strcmp(mname, "pc-0.10") == 0)) { + (strcmp(mname, "pc-0.12") == 0)) { path = g_strdup_printf("cpu-plug/%s/init/%ux%ux%u&maxcpus=%u", mname, data->sockets, data->cores, data->threads, data->maxcpus); From 5571727a6368bd23d751336e261eaf1b32dd472d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 12 Dec 2018 10:16:13 +0100 Subject: [PATCH 31/44] pci/pcie: rename hotplug handler callbacks The callbacks are also called for cold plugged devices. Drop the "hot" to better match the actual callback names. While at it, also rename pcie_cap_slot_hotplug_common() to pcie_cap_slot_plug_common(). Reviewed-by: David Gibson Reviewed-by: Igor Mammedov Signed-off-by: David Hildenbrand Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 17 ++++++++--------- hw/pci/pcie_port.c | 4 ++-- include/hw/pci/pcie.h | 8 ++++---- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index d91a615193..ec3b1145f3 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -390,9 +390,8 @@ static void pcie_cap_slot_event(PCIDevice *dev, PCIExpressHotPlugEvent event) hotplug_event_notify(dev); } -static void pcie_cap_slot_hotplug_common(PCIDevice *hotplug_dev, - DeviceState *dev, - uint8_t **exp_cap, Error **errp) +static void pcie_cap_slot_plug_common(PCIDevice *hotplug_dev, DeviceState *dev, + uint8_t **exp_cap, Error **errp) { *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap; uint16_t sltsta = pci_get_word(*exp_cap + PCI_EXP_SLTSTA); @@ -406,13 +405,13 @@ static void pcie_cap_slot_hotplug_common(PCIDevice *hotplug_dev, } } -void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - Error **errp) +void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) { uint8_t *exp_cap; PCIDevice *pci_dev = PCI_DEVICE(dev); - pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp); + pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp); /* Don't send event when device is enabled during qemu machine creation: * it is present on boot, no hotplug event is necessary. We do send an @@ -448,14 +447,14 @@ static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque) object_unparent(OBJECT(dev)); } -void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) { uint8_t *exp_cap; PCIDevice *pci_dev = PCI_DEVICE(dev); PCIBus *bus = pci_get_bus(pci_dev); - pcie_cap_slot_hotplug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp); + pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, &exp_cap, errp); /* In case user cancel the operation of multi-function hot-add, * remove the function that is unexposed to guest individually, diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c index 6432b9ac1f..73e81e5847 100644 --- a/hw/pci/pcie_port.c +++ b/hw/pci/pcie_port.c @@ -154,8 +154,8 @@ static void pcie_slot_class_init(ObjectClass *oc, void *data) HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); dc->props = pcie_slot_props; - hc->plug = pcie_cap_slot_hotplug_cb; - hc->unplug_request = pcie_cap_slot_hot_unplug_request_cb; + hc->plug = pcie_cap_slot_plug_cb; + hc->unplug_request = pcie_cap_slot_unplug_request_cb; } static const TypeInfo pcie_slot_type_info = { diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h index 1976909ab4..d51ca23f07 100644 --- a/include/hw/pci/pcie.h +++ b/include/hw/pci/pcie.h @@ -132,8 +132,8 @@ void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn); void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num); void pcie_ats_init(PCIDevice *dev, uint16_t offset); -void pcie_cap_slot_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - Error **errp); -void pcie_cap_slot_hot_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp); +void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); #endif /* QEMU_PCIE_H */ From 851fedfbc514f33d867718febf45bf90c9bed341 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 12 Dec 2018 10:16:14 +0100 Subject: [PATCH 32/44] pci/shpc: rename hotplug handler callbacks The callbacks are also called for cold plugged devices. Drop the "hot" to better match the actual callback names. While at it, also rename shpc_device_hotplug_common() to shpc_device_plug_common(). Reviewed-by: David Gibson Reviewed-by: Igor Mammedov Signed-off-by: David Hildenbrand Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/pci_bridge_dev.c | 17 ++++++++--------- hw/pci-bridge/pcie_pci_bridge.c | 17 ++++++++--------- hw/pci/shpc.c | 14 +++++++------- include/hw/pci/shpc.h | 8 ++++---- 4 files changed, 27 insertions(+), 29 deletions(-) diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c index 97a8e8b6a4..e1df9a52ac 100644 --- a/hw/pci-bridge/pci_bridge_dev.c +++ b/hw/pci-bridge/pci_bridge_dev.c @@ -206,8 +206,8 @@ static const VMStateDescription pci_bridge_dev_vmstate = { } }; -static void pci_bridge_dev_hotplug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +static void pci_bridge_dev_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) { PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); @@ -216,12 +216,11 @@ static void pci_bridge_dev_hotplug_cb(HotplugHandler *hotplug_dev, "this %s", TYPE_PCI_BRIDGE_DEV); return; } - shpc_device_hotplug_cb(hotplug_dev, dev, errp); + shpc_device_plug_cb(hotplug_dev, dev, errp); } -static void pci_bridge_dev_hot_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, - Error **errp) +static void pci_bridge_dev_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) { PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); @@ -230,7 +229,7 @@ static void pci_bridge_dev_hot_unplug_request_cb(HotplugHandler *hotplug_dev, "this %s", TYPE_PCI_BRIDGE_DEV); return; } - shpc_device_hot_unplug_request_cb(hotplug_dev, dev, errp); + shpc_device_unplug_request_cb(hotplug_dev, dev, errp); } static void pci_bridge_dev_class_init(ObjectClass *klass, void *data) @@ -251,8 +250,8 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data) dc->props = pci_bridge_dev_properties; dc->vmsd = &pci_bridge_dev_vmstate; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); - hc->plug = pci_bridge_dev_hotplug_cb; - hc->unplug_request = pci_bridge_dev_hot_unplug_request_cb; + hc->plug = pci_bridge_dev_plug_cb; + hc->unplug_request = pci_bridge_dev_unplug_request_cb; } static const TypeInfo pci_bridge_dev_info = { diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c index 04cf5a6a92..c634353b06 100644 --- a/hw/pci-bridge/pcie_pci_bridge.c +++ b/hw/pci-bridge/pcie_pci_bridge.c @@ -137,8 +137,8 @@ static const VMStateDescription pcie_pci_bridge_dev_vmstate = { } }; -static void pcie_pci_bridge_hotplug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +static void pcie_pci_bridge_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) { PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); @@ -147,12 +147,11 @@ static void pcie_pci_bridge_hotplug_cb(HotplugHandler *hotplug_dev, "this %s", TYPE_PCIE_PCI_BRIDGE_DEV); return; } - shpc_device_hotplug_cb(hotplug_dev, dev, errp); + shpc_device_plug_cb(hotplug_dev, dev, errp); } -static void pcie_pci_bridge_hot_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, - Error **errp) +static void pcie_pci_bridge_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) { PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); @@ -161,7 +160,7 @@ static void pcie_pci_bridge_hot_unplug_request_cb(HotplugHandler *hotplug_dev, "this %s", TYPE_PCIE_PCI_BRIDGE_DEV); return; } - shpc_device_hot_unplug_request_cb(hotplug_dev, dev, errp); + shpc_device_unplug_request_cb(hotplug_dev, dev, errp); } static void pcie_pci_bridge_class_init(ObjectClass *klass, void *data) @@ -180,8 +179,8 @@ static void pcie_pci_bridge_class_init(ObjectClass *klass, void *data) dc->props = pcie_pci_bridge_dev_properties; dc->reset = &pcie_pci_bridge_reset; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); - hc->plug = pcie_pci_bridge_hotplug_cb; - hc->unplug_request = pcie_pci_bridge_hot_unplug_request_cb; + hc->plug = pcie_pci_bridge_plug_cb; + hc->unplug_request = pcie_pci_bridge_unplug_request_cb; } static const TypeInfo pcie_pci_bridge_info = { diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index 96a43d2f70..7851bfa8ad 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -482,8 +482,8 @@ static const MemoryRegionOps shpc_mmio_ops = { .max_access_size = 4, }, }; -static void shpc_device_hotplug_common(PCIDevice *affected_dev, int *slot, - SHPCDevice *shpc, Error **errp) +static void shpc_device_plug_common(PCIDevice *affected_dev, int *slot, + SHPCDevice *shpc, Error **errp) { int pci_slot = PCI_SLOT(affected_dev->devfn); *slot = SHPC_PCI_TO_IDX(pci_slot); @@ -497,7 +497,7 @@ static void shpc_device_hotplug_common(PCIDevice *affected_dev, int *slot, } } -void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, +void shpc_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { Error *local_err = NULL; @@ -505,7 +505,7 @@ void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, SHPCDevice *shpc = pci_hotplug_dev->shpc; int slot; - shpc_device_hotplug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); + shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -540,8 +540,8 @@ void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, shpc_interrupt_update(pci_hotplug_dev); } -void shpc_device_hot_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +void shpc_device_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) { Error *local_err = NULL; PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); @@ -550,7 +550,7 @@ void shpc_device_hot_unplug_request_cb(HotplugHandler *hotplug_dev, uint8_t led; int slot; - shpc_device_hotplug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); + shpc_device_plug_common(PCI_DEVICE(dev), &slot, shpc, &local_err); if (local_err) { error_propagate(errp, local_err); return; diff --git a/include/hw/pci/shpc.h b/include/hw/pci/shpc.h index ee19fecf61..71293aca58 100644 --- a/include/hw/pci/shpc.h +++ b/include/hw/pci/shpc.h @@ -45,10 +45,10 @@ void shpc_free(PCIDevice *dev); void shpc_cap_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int len); -void shpc_device_hotplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, - Error **errp); -void shpc_device_hot_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp); +void shpc_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void shpc_device_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); extern VMStateInfo shpc_vmstate_info; #define SHPC_VMSTATE(_field, _type, _test) \ From fa2a7751172b6228706decfbdddb6eac39052ab1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 12 Dec 2018 10:16:15 +0100 Subject: [PATCH 33/44] s390x/pci: rename hotplug handler callbacks The callbacks are also called for cold plugged devices. Drop the "hot" to better match the actual callback names. Reviewed-by: David Gibson Reviewed-by: Cornelia Huck Reviewed-by: Igor Mammedov Reviewed-by: Pierre Morel Signed-off-by: David Hildenbrand Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/s390x/s390-pci-bus.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c index f7458445c0..15759b6514 100644 --- a/hw/s390x/s390-pci-bus.c +++ b/hw/s390x/s390-pci-bus.c @@ -823,8 +823,8 @@ static bool s390_pci_alloc_idx(S390pciState *s, S390PCIBusDevice *pbdev) return true; } -static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) { PCIDevice *pdev = NULL; S390PCIBusDevice *pbdev = NULL; @@ -932,8 +932,8 @@ static void s390_pcihost_timer_cb(void *opaque) qdev_unplug(DEVICE(pbdev), NULL); } -static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) { PCIDevice *pci_dev = NULL; PCIBus *bus; @@ -1041,8 +1041,8 @@ static void s390_pcihost_class_init(ObjectClass *klass, void *data) dc->reset = s390_pcihost_reset; dc->realize = s390_pcihost_realize; - hc->plug = s390_pcihost_hot_plug; - hc->unplug = s390_pcihost_hot_unplug; + hc->plug = s390_pcihost_plug; + hc->unplug = s390_pcihost_unplug; msi_nonbroken = true; } From ec266f408882fd38475f72c4e864ed576228643b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 12 Dec 2018 10:16:17 +0100 Subject: [PATCH 34/44] pci/pcihp: perform check for bus capability in pre_plug handler Perform the check in the pre_plug handler. In addition, we need the capability only if the device is actually hotplugged (and not created during machine initialization). This is a preparation for coldplugging pci devices via that hotplug handler. Reviewed-by: Igor Mammedov Signed-off-by: David Hildenbrand Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/pcihp.c | 21 +++++++++++++++------ hw/acpi/piix4.c | 16 ++++++++++++++-- include/hw/acpi/pcihp.h | 2 ++ 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index 80d42e12ff..5e7cef173c 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -217,17 +217,24 @@ void acpi_pcihp_reset(AcpiPciHpState *s) acpi_pcihp_update(s); } +void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + /* Only hotplugged devices need the hotplug capability. */ + if (dev->hotplugged && + acpi_pcihp_get_bsel(pci_get_bus(PCI_DEVICE(dev))) < 0) { + error_setg(errp, "Unsupported bus. Bus doesn't have property '" + ACPI_PCIHP_PROP_BSEL "' set"); + return; + } +} + void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, DeviceState *dev, Error **errp) { PCIDevice *pdev = PCI_DEVICE(dev); int slot = PCI_SLOT(pdev->devfn); - int bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev)); - if (bsel < 0) { - error_setg(errp, "Unsupported bus. Bus doesn't have property '" - ACPI_PCIHP_PROP_BSEL "' set"); - return; - } + int bsel; /* Don't send event when device is enabled during qemu machine creation: * it is present on boot, no hotplug event is necessary. We do send an @@ -236,6 +243,8 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, return; } + bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev)); + g_assert(bsel >= 0); s->acpi_pcihp_pci_status[bsel].up |= (1U << slot); acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); } diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 2f4dd03b83..f68e62d36c 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -371,6 +371,18 @@ static void piix4_pm_powerdown_req(Notifier *n, void *opaque) acpi_pm1_evt_power_down(&s->ar); } +static void piix4_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_pre_plug_cb(hotplug_dev, dev, errp); + } else if (!object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) && + !object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + error_setg(errp, "acpi: device pre plug request for not supported" + " device type: %s", object_get_typename(OBJECT(dev))); + } +} + static void piix4_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -393,8 +405,7 @@ static void piix4_device_plug_cb(HotplugHandler *hotplug_dev, acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp); } } else { - error_setg(errp, "acpi: device plug request for not supported device" - " type: %s", object_get_typename(OBJECT(dev))); + g_assert_not_reached(); } } @@ -703,6 +714,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data) */ dc->user_creatable = false; dc->hotpluggable = false; + hc->pre_plug = piix4_device_pre_plug_cb; hc->plug = piix4_device_plug_cb; hc->unplug_request = piix4_device_unplug_request_cb; hc->unplug = piix4_device_unplug_cb; diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h index 8a65f99fc8..ce31625850 100644 --- a/include/hw/acpi/pcihp.h +++ b/include/hw/acpi/pcihp.h @@ -56,6 +56,8 @@ typedef struct AcpiPciHpState { void acpi_pcihp_init(Object *owner, AcpiPciHpState *, PCIBus *root, MemoryRegion *address_space_io, bool bridges_enabled); +void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, DeviceState *dev, Error **errp); void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, From 3e5209265798af396b9f20bac58f203a743cc3f4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 12 Dec 2018 10:16:18 +0100 Subject: [PATCH 35/44] pci/pcihp: overwrite hotplug handler recursively from the start For now, the hotplug handler is not called for devices that are being cold plugged. The hotplug handler is setup when the machine initialization is fully done. Only bridges that were cold plugged are considered. Set the hotplug handler for the root piix bus directly when realizing. Overwrite the hotplug handler of bridges when coldplugging them. This will now make sure that the ACPI PCI hotplug handler is also called for cold plugged devices (also on bridges) but not for bridges that were hotplugged (keeping the current behavior). Reviewed-by: Igor Mammedov Signed-off-by: David Hildenbrand Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/pcihp.c | 15 +++++++++++++++ hw/acpi/piix4.c | 16 +--------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index 5e7cef173c..05e3f8d11e 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -30,6 +30,7 @@ #include "hw/hw.h" #include "hw/i386/pc.h" #include "hw/pci/pci.h" +#include "hw/pci/pci_bridge.h" #include "hw/acpi/acpi.h" #include "sysemu/sysemu.h" #include "exec/address-spaces.h" @@ -240,6 +241,20 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, * it is present on boot, no hotplug event is necessary. We do send an * event when the device is disabled later. */ if (!dev->hotplugged) { + /* + * Overwrite the default hotplug handler with the ACPI PCI one + * for cold plugged bridges only. + */ + if (!s->legacy_piix && + object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { + PCIBus *sec = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); + + qbus_set_hotplug_handler(BUS(sec), DEVICE(hotplug_dev), + &error_abort); + /* We don't have to overwrite any other hotplug handler yet */ + assert(QLIST_EMPTY(&sec->child)); + } + return; } diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index f68e62d36c..07dfeb0e3d 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -447,15 +447,6 @@ static void piix4_device_unplug_cb(HotplugHandler *hotplug_dev, } } -static void piix4_update_bus_hotplug(PCIBus *pci_bus, void *opaque) -{ - PIIX4PMState *s = opaque; - - /* pci_bus cannot outlive PIIX4PMState, because /machine keeps it alive - * and it's not hot-unpluggable */ - qbus_set_hotplug_handler(BUS(pci_bus), DEVICE(s), &error_abort); -} - static void piix4_pm_machine_ready(Notifier *n, void *opaque) { PIIX4PMState *s = container_of(n, PIIX4PMState, machine_ready); @@ -469,12 +460,6 @@ static void piix4_pm_machine_ready(Notifier *n, void *opaque) pci_conf[0x63] = 0x60; pci_conf[0x67] = (memory_region_present(io_as, 0x3f8) ? 0x08 : 0) | (memory_region_present(io_as, 0x2f8) ? 0x90 : 0); - - if (s->use_acpi_pci_hotplug) { - pci_for_each_bus(pci_get_bus(d), piix4_update_bus_hotplug, s); - } else { - piix4_update_bus_hotplug(pci_get_bus(d), s); - } } static void piix4_pm_add_propeties(PIIX4PMState *s) @@ -548,6 +533,7 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp) piix4_acpi_system_hot_add_init(pci_address_space_io(dev), pci_get_bus(dev), s); + qbus_set_hotplug_handler(BUS(pci_get_bus(dev)), DEVICE(s), &error_abort); piix4_pm_add_propeties(s); } From c97adf3ccfbfbe6885fd9de7293162489d293d44 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 12 Dec 2018 10:16:19 +0100 Subject: [PATCH 36/44] pci/pcihp: perform unplug via the hotplug handler Introduce and use the "unplug" callback. This is a preparation for multi-stage hotplug handlers, whereby the bus hotplug handler is overwritten by the machine hotplug handler. This handler will then pass control to the bus hotplug handler. So to get this running cleanly, we also have to make sure to go via the hotplug handler chain when actually unplugging a device after an unplug request. Lookup the hotplug handler and call "unplug". Reviewed-by: Igor Mammedov Signed-off-by: David Hildenbrand Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/pcihp.c | 11 ++++++++++- hw/acpi/piix4.c | 7 +++++-- include/hw/acpi/pcihp.h | 3 +++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index 05e3f8d11e..7bc7a72340 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -154,6 +154,7 @@ static bool acpi_pcihp_pc_no_hotplug(AcpiPciHpState *s, PCIDevice *dev) static void acpi_pcihp_eject_slot(AcpiPciHpState *s, unsigned bsel, unsigned slots) { + HotplugHandler *hotplug_ctrl; BusChild *kid, *next; int slot = ctz32(slots); PCIBus *bus = acpi_pcihp_find_hotplug_bus(s, bsel); @@ -171,7 +172,8 @@ static void acpi_pcihp_eject_slot(AcpiPciHpState *s, unsigned bsel, unsigned slo PCIDevice *dev = PCI_DEVICE(qdev); if (PCI_SLOT(dev->devfn) == slot) { if (!acpi_pcihp_pc_no_hotplug(s, dev)) { - object_unparent(OBJECT(qdev)); + hotplug_ctrl = qdev_get_hotplug_handler(qdev); + hotplug_handler_unplug(hotplug_ctrl, qdev, &error_abort); } } } @@ -266,6 +268,13 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, DeviceState *dev, Error **errp) +{ + object_unparent(OBJECT(dev)); +} + +void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, + AcpiPciHpState *s, DeviceState *dev, + Error **errp) { PCIDevice *pdev = PCI_DEVICE(dev); int slot = PCI_SLOT(pdev->devfn); diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 07dfeb0e3d..88f9a9ec09 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -419,8 +419,8 @@ static void piix4_device_unplug_request_cb(HotplugHandler *hotplug_dev, acpi_memory_unplug_request_cb(hotplug_dev, &s->acpi_memory_hotplug, dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { - acpi_pcihp_device_unplug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev, - errp); + acpi_pcihp_device_unplug_request_cb(hotplug_dev, &s->acpi_pci_hotplug, + dev, errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) && !s->cpu_hotplug_legacy) { acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp); @@ -438,6 +438,9 @@ static void piix4_device_unplug_cb(HotplugHandler *hotplug_dev, if (s->acpi_memory_hotplug.is_enabled && object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { acpi_memory_unplug_cb(&s->acpi_memory_hotplug, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_unplug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev, + errp); } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) && !s->cpu_hotplug_legacy) { acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp); diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h index ce31625850..8bc4a4c01d 100644 --- a/include/hw/acpi/pcihp.h +++ b/include/hw/acpi/pcihp.h @@ -62,6 +62,9 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, DeviceState *dev, Error **errp); void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, DeviceState *dev, Error **errp); +void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, + AcpiPciHpState *s, DeviceState *dev, + Error **errp); /* Called on reset */ void acpi_pcihp_reset(AcpiPciHpState *s); From a1952d01e731856f5a9654508d6c9658796e40f7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 12 Dec 2018 10:16:20 +0100 Subject: [PATCH 37/44] pci/pcie: perform unplug via the hotplug handler Introduce and use the "unplug" callback. This is a preparation for multi-stage hotplug handlers, whereby the bus hotplug handler is overwritten by the machine hotplug handler. This handler will then pass control to the bus hotplug handler. So to get this running cleanly, we also have to make sure to go via the hotplug handler chain when actually unplugging a device after an unplug request. Lookup the hotplug handler and call "unplug". Reviewed-by: David Gibson Reviewed-by: Igor Mammedov Signed-off-by: David Hildenbrand Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 10 +++++++++- hw/pci/pcie_port.c | 1 + include/hw/pci/pcie.h | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index ec3b1145f3..2d3d8a047b 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -442,11 +442,19 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, } } -static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque) +void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) { object_unparent(OBJECT(dev)); } +static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque) +{ + HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(DEVICE(dev)); + + hotplug_handler_unplug(hotplug_ctrl, DEVICE(dev), &error_abort); +} + void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c index 73e81e5847..bc07abc31b 100644 --- a/hw/pci/pcie_port.c +++ b/hw/pci/pcie_port.c @@ -155,6 +155,7 @@ static void pcie_slot_class_init(ObjectClass *oc, void *data) dc->props = pcie_slot_props; hc->plug = pcie_cap_slot_plug_cb; + hc->unplug = pcie_cap_slot_unplug_cb; hc->unplug_request = pcie_cap_slot_unplug_request_cb; } diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h index d51ca23f07..cd318646a2 100644 --- a/include/hw/pci/pcie.h +++ b/include/hw/pci/pcie.h @@ -134,6 +134,8 @@ void pcie_ats_init(PCIDevice *dev, uint16_t offset); void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); +void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); #endif /* QEMU_PCIE_H */ From 62b765639691de36476c471117e96184a2a3c7a6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 12 Dec 2018 10:16:21 +0100 Subject: [PATCH 38/44] pci: Reuse pci-bridge hotplug handler handlers for pcie-pci-bridge These functions are essentially the same, we only have to use object_get_typename() for reporting errors. So let's share the implementation of hotplug handler callbacks. Suggested-by: Igor Mammedov Reviewed-by: Igor Mammedov Signed-off-by: David Hildenbrand Reviewed-by: David Gibson Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/pci_bridge_dev.c | 12 ++++++------ hw/pci-bridge/pcie_pci_bridge.c | 30 ++---------------------------- include/hw/pci/pci_bridge.h | 4 ++++ 3 files changed, 12 insertions(+), 34 deletions(-) diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c index e1df9a52ac..fa0be13ac4 100644 --- a/hw/pci-bridge/pci_bridge_dev.c +++ b/hw/pci-bridge/pci_bridge_dev.c @@ -206,27 +206,27 @@ static const VMStateDescription pci_bridge_dev_vmstate = { } }; -static void pci_bridge_dev_plug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +void pci_bridge_dev_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) { PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); if (!shpc_present(pci_hotplug_dev)) { error_setg(errp, "standard hotplug controller has been disabled for " - "this %s", TYPE_PCI_BRIDGE_DEV); + "this %s", object_get_typename(OBJECT(hotplug_dev))); return; } shpc_device_plug_cb(hotplug_dev, dev, errp); } -static void pci_bridge_dev_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) +void pci_bridge_dev_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) { PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); if (!shpc_present(pci_hotplug_dev)) { error_setg(errp, "standard hotplug controller has been disabled for " - "this %s", TYPE_PCI_BRIDGE_DEV); + "this %s", object_get_typename(OBJECT(hotplug_dev))); return; } shpc_device_unplug_request_cb(hotplug_dev, dev, errp); diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c index c634353b06..0ffea680d5 100644 --- a/hw/pci-bridge/pcie_pci_bridge.c +++ b/hw/pci-bridge/pcie_pci_bridge.c @@ -137,32 +137,6 @@ static const VMStateDescription pcie_pci_bridge_dev_vmstate = { } }; -static void pcie_pci_bridge_plug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); - - if (!shpc_present(pci_hotplug_dev)) { - error_setg(errp, "standard hotplug controller has been disabled for " - "this %s", TYPE_PCIE_PCI_BRIDGE_DEV); - return; - } - shpc_device_plug_cb(hotplug_dev, dev, errp); -} - -static void pcie_pci_bridge_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); - - if (!shpc_present(pci_hotplug_dev)) { - error_setg(errp, "standard hotplug controller has been disabled for " - "this %s", TYPE_PCIE_PCI_BRIDGE_DEV); - return; - } - shpc_device_unplug_request_cb(hotplug_dev, dev, errp); -} - static void pcie_pci_bridge_class_init(ObjectClass *klass, void *data) { PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); @@ -179,8 +153,8 @@ static void pcie_pci_bridge_class_init(ObjectClass *klass, void *data) dc->props = pcie_pci_bridge_dev_properties; dc->reset = &pcie_pci_bridge_reset; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); - hc->plug = pcie_pci_bridge_plug_cb; - hc->unplug_request = pcie_pci_bridge_unplug_request_cb; + hc->plug = pci_bridge_dev_plug_cb; + hc->unplug_request = pci_bridge_dev_unplug_request_cb; } static const TypeInfo pcie_pci_bridge_info = { diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h index cdff7edfd1..6e37c7551a 100644 --- a/include/hw/pci/pci_bridge.h +++ b/include/hw/pci/pci_bridge.h @@ -99,6 +99,10 @@ void pci_bridge_reset(DeviceState *qdev); void pci_bridge_initfn(PCIDevice *pci_dev, const char *typename); void pci_bridge_exitfn(PCIDevice *pci_dev); +void pci_bridge_dev_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); +void pci_bridge_dev_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); /* * before qdev initialization(qdev_init()), this function sets bus_name and From 8f560cdce42562a3c41d61c896667cf77c76d51f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 12 Dec 2018 10:16:22 +0100 Subject: [PATCH 39/44] pci/shpc: perform unplug via the hotplug handler Introduce and use the "unplug" callback. This is a preparation for multi-stage hotplug handlers, whereby the bus hotplug handler is overwritten by the machine hotplug handler. This handler will then pass control to the bus hotplug handler. So to get this running cleanly, we also have to make sure to go via the hotplug handler chain when actually unplugging a device after an unplug request. Lookup the hotplug handler and call "unplug". Reviewed-by: David Gibson Signed-off-by: David Hildenbrand Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/pci_bridge_dev.c | 10 ++++++++++ hw/pci-bridge/pcie_pci_bridge.c | 1 + hw/pci/shpc.c | 11 ++++++++++- include/hw/pci/pci_bridge.h | 2 ++ include/hw/pci/shpc.h | 2 ++ 5 files changed, 25 insertions(+), 1 deletion(-) diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c index fa0be13ac4..ff6b8323da 100644 --- a/hw/pci-bridge/pci_bridge_dev.c +++ b/hw/pci-bridge/pci_bridge_dev.c @@ -219,6 +219,15 @@ void pci_bridge_dev_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, shpc_device_plug_cb(hotplug_dev, dev, errp); } +void pci_bridge_dev_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + PCIDevice *pci_hotplug_dev = PCI_DEVICE(hotplug_dev); + + g_assert(shpc_present(pci_hotplug_dev)); + shpc_device_unplug_cb(hotplug_dev, dev, errp); +} + void pci_bridge_dev_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -251,6 +260,7 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data) dc->vmsd = &pci_bridge_dev_vmstate; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); hc->plug = pci_bridge_dev_plug_cb; + hc->unplug = pci_bridge_dev_unplug_cb; hc->unplug_request = pci_bridge_dev_unplug_request_cb; } diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c index 0ffea680d5..d491b40d04 100644 --- a/hw/pci-bridge/pcie_pci_bridge.c +++ b/hw/pci-bridge/pcie_pci_bridge.c @@ -154,6 +154,7 @@ static void pcie_pci_bridge_class_init(ObjectClass *klass, void *data) dc->reset = &pcie_pci_bridge_reset; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); hc->plug = pci_bridge_dev_plug_cb; + hc->unplug = pci_bridge_dev_unplug_cb; hc->unplug_request = pci_bridge_dev_unplug_request_cb; } diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index 7851bfa8ad..45053b39b9 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -238,6 +238,7 @@ static void shpc_invalid_command(SHPCDevice *shpc) static void shpc_free_devices_in_slot(SHPCDevice *shpc, int slot) { + HotplugHandler *hotplug_ctrl; int devfn; int pci_slot = SHPC_IDX_TO_PCI(slot); for (devfn = PCI_DEVFN(pci_slot, 0); @@ -245,7 +246,9 @@ static void shpc_free_devices_in_slot(SHPCDevice *shpc, int slot) ++devfn) { PCIDevice *affected_dev = shpc->sec_bus->devices[devfn]; if (affected_dev) { - object_unparent(OBJECT(affected_dev)); + hotplug_ctrl = qdev_get_hotplug_handler(DEVICE(affected_dev)); + hotplug_handler_unplug(hotplug_ctrl, DEVICE(affected_dev), + &error_abort); } } } @@ -540,6 +543,12 @@ void shpc_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, shpc_interrupt_update(pci_hotplug_dev); } +void shpc_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + object_unparent(OBJECT(dev)); +} + void shpc_device_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h index 6e37c7551a..ba488818d2 100644 --- a/include/hw/pci/pci_bridge.h +++ b/include/hw/pci/pci_bridge.h @@ -101,6 +101,8 @@ void pci_bridge_exitfn(PCIDevice *pci_dev); void pci_bridge_dev_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); +void pci_bridge_dev_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); void pci_bridge_dev_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); diff --git a/include/hw/pci/shpc.h b/include/hw/pci/shpc.h index 71293aca58..18f6ec1cd5 100644 --- a/include/hw/pci/shpc.h +++ b/include/hw/pci/shpc.h @@ -47,6 +47,8 @@ void shpc_cap_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int len); void shpc_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); +void shpc_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp); void shpc_device_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); From 27c1da512994b34912dbb8d12982045da1450e65 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 12 Dec 2018 10:16:23 +0100 Subject: [PATCH 40/44] spapr_pci: perform unplug via the hotplug handler Introduce and use the "unplug" callback. This is a preparation for multi-stage hotplug handlers, whereby the bus hotplug handler is overwritten by the machine hotplug handler. This handler will then pass control to the bus hotplug handler. So to get this running cleanly, we also have to make sure to go via the hotplug handler chain when actually unplugging a device after an unplug request. Lookup the hotplug handler and call "unplug". Reviewed-by: Greg Kurz Reviewed-by: Igor Mammedov Acked-by: David Gibson Signed-off-by: David Hildenbrand Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/ppc/spapr_pci.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c index 2374d55fc1..bfb02ee96b 100644 --- a/hw/ppc/spapr_pci.c +++ b/hw/ppc/spapr_pci.c @@ -1370,18 +1370,9 @@ static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev, /* Callback to be called during DRC release. */ void spapr_phb_remove_pci_device_cb(DeviceState *dev) { - /* some version guests do not wait for completion of a device - * cleanup (generally done asynchronously by the kernel) before - * signaling to QEMU that the device is safe, but instead sleep - * for some 'safe' period of time. unfortunately on a busy host - * this sleep isn't guaranteed to be long enough, resulting in - * bad things like IRQ lines being left asserted during final - * device removal. to deal with this we call reset just prior - * to finalizing the device, which will put the device back into - * an 'idle' state, as the device cleanup code expects. - */ - pci_device_reset(PCI_DEVICE(dev)); - object_unparent(OBJECT(dev)); + HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev); + + hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); } static sPAPRDRConnector *spapr_phb_get_pci_func_drc(sPAPRPHBState *phb, @@ -1490,6 +1481,23 @@ out: } } +static void spapr_pci_unplug(HotplugHandler *plug_handler, + DeviceState *plugged_dev, Error **errp) +{ + /* some version guests do not wait for completion of a device + * cleanup (generally done asynchronously by the kernel) before + * signaling to QEMU that the device is safe, but instead sleep + * for some 'safe' period of time. unfortunately on a busy host + * this sleep isn't guaranteed to be long enough, resulting in + * bad things like IRQ lines being left asserted during final + * device removal. to deal with this we call reset just prior + * to finalizing the device, which will put the device back into + * an 'idle' state, as the device cleanup code expects. + */ + pci_device_reset(PCI_DEVICE(plugged_dev)); + object_unparent(OBJECT(plugged_dev)); +} + static void spapr_pci_unplug_request(HotplugHandler *plug_handler, DeviceState *plugged_dev, Error **errp) { @@ -1965,6 +1973,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data) dc->user_creatable = true; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); hp->plug = spapr_pci_plug; + hp->unplug = spapr_pci_unplug; hp->unplug_request = spapr_pci_unplug_request; } From c2077e2ca0da75b6b97e2485a41b8168e2a387c2 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 18 Jan 2016 16:06:03 -0700 Subject: [PATCH 41/44] pci: Adjust PCI config limit based on bus topology A conventional PCI bus does not support config space accesses above the standard 256 byte configuration space. PCIe-to-PCI bridges are not permitted to forward transactions if the extended register address field is non-zero and must handle it as an unsupported request (PCIe bridge spec rev 1.0, 4.1.3, 4.1.4). Therefore, we should not support extended config space if there is a conventional bus anywhere on the path to a device. Signed-off-by: Alex Williamson Reviewed-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci_host.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index 5eaa935cb5..5f5345dbac 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "hw/pci/pci.h" +#include "hw/pci/pci_bridge.h" #include "hw/pci/pci_host.h" #include "hw/pci/pci_bus.h" #include "trace.h" @@ -50,9 +51,29 @@ static inline PCIDevice *pci_dev_find_by_addr(PCIBus *bus, uint32_t addr) return pci_find_device(bus, bus_num, devfn); } +static void pci_adjust_config_limit(PCIBus *bus, uint32_t *limit) +{ + if (*limit > PCI_CONFIG_SPACE_SIZE) { + if (!pci_bus_is_express(bus)) { + *limit = PCI_CONFIG_SPACE_SIZE; + return; + } + + if (!pci_bus_is_root(bus)) { + PCIDevice *bridge = pci_bridge_get_device(bus); + pci_adjust_config_limit(pci_get_bus(bridge), limit); + } + } +} + void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, uint32_t limit, uint32_t val, uint32_t len) { + pci_adjust_config_limit(pci_get_bus(pci_dev), &limit); + if (limit <= addr) { + return; + } + assert(len <= 4); /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. @@ -71,6 +92,11 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, { uint32_t ret; + pci_adjust_config_limit(pci_get_bus(pci_dev), &limit); + if (limit <= addr) { + return ~0x0; + } + assert(len <= 4); /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. From b2fc91db84470a78f8e93f5b5f913c17188792c8 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 20 Dec 2018 13:40:35 +0800 Subject: [PATCH 42/44] q35: set split kernel irqchip as default Starting from QEMU 4.0, let's specify "split" as the default value for kernel-irqchip. So for QEMU>=4.0 we'll have: allowed=Y,required=N,split=Y for QEMU<=3.1 we'll have: allowed=Y,required=N,split=N (omitting all the "kernel_irqchip_" prefix) Note that this will let the default q35 machine type to depend on Linux version 4.4 or newer because that's where split irqchip is introduced in kernel. But it's fine since we're boosting supported Linux version for QEMU 4.0 to around Linux 4.5. For more information please refer to the discussion on AMD's RDTSCP: https://lore.kernel.org/lkml/20181210181328.GA762@zn.tnic/ Signed-off-by: Peter Xu Reviewed-by: Eduardo Habkost Acked-by: Paolo Bonzini Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/core/machine.c | 2 ++ hw/i386/pc_q35.c | 2 ++ include/hw/boards.h | 1 + 3 files changed, 5 insertions(+) diff --git a/hw/core/machine.c b/hw/core/machine.c index c51423b647..4439ea663f 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -653,8 +653,10 @@ static void machine_class_base_init(ObjectClass *oc, void *data) static void machine_initfn(Object *obj) { MachineState *ms = MACHINE(obj); + MachineClass *mc = MACHINE_GET_CLASS(obj); ms->kernel_irqchip_allowed = true; + ms->kernel_irqchip_split = mc->default_kernel_irqchip_split; ms->kvm_shadow_mem = -1; ms->dump_guest_core = true; ms->mem_merge = true; diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 8836d21485..0a3f3f18e4 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -304,6 +304,7 @@ static void pc_q35_machine_options(MachineClass *m) m->units_per_default_bus = 1; m->default_machine_opts = "firmware=bios-256k.bin"; m->default_display = "std"; + m->default_kernel_irqchip_split = true; m->no_floppy = 1; machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); @@ -323,6 +324,7 @@ DEFINE_Q35_MACHINE(v4_0, "pc-q35-4.0", NULL, static void pc_q35_3_1_machine_options(MachineClass *m) { pc_q35_4_0_machine_options(m); + m->default_kernel_irqchip_split = false; m->alias = NULL; SET_MACHINE_COMPAT(m, PC_COMPAT_3_1); } diff --git a/include/hw/boards.h b/include/hw/boards.h index f82f28468b..362384815e 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -195,6 +195,7 @@ struct MachineClass { const char *hw_version; ram_addr_t default_ram_size; const char *default_cpu_type; + bool default_kernel_irqchip_split; bool option_rom_has_mr; bool rom_file_has_mr; int minimum_page_bits; From a924b3d8df55a395891fd5ed341d0deb135d9aa6 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 20 Dec 2018 13:40:36 +0800 Subject: [PATCH 43/44] x86-iommu: switch intr_supported to OnOffAuto type Switch the intr_supported variable from a boolean to OnOffAuto type so that we can know whether the user specified it or not. With that we'll have a chance to help the user to choose more wisely where possible. Introduce x86_iommu_ir_supported() to mask these changes. No functional change at all. Signed-off-by: Peter Xu Acked-by: Paolo Bonzini Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 6 +++--- hw/i386/amd_iommu.c | 2 +- hw/i386/intel_iommu.c | 6 +++--- hw/i386/pc.c | 2 +- hw/i386/x86-iommu.c | 15 +++++++++++++-- include/hw/i386/x86-iommu.h | 4 +++- 6 files changed, 24 insertions(+), 11 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 9891b6913b..14f757fc36 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2426,7 +2426,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker) IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu); assert(iommu); - if (iommu->intr_supported) { + if (x86_iommu_ir_supported(iommu)) { dmar_flags |= 0x1; /* Flags: 0x1: INT_REMAP */ } @@ -2499,7 +2499,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker) * When interrupt remapping is supported, we add a special IVHD device * for type IO-APIC. */ - if (x86_iommu_get_default()->intr_supported) { + if (x86_iommu_ir_supported(x86_iommu_get_default())) { ivhd_table_len += 8; } /* IVHD length */ @@ -2535,7 +2535,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker) * Linux IOMMU driver checks for the special IVHD device (type IO-APIC). * See Linux kernel commit 'c2ff5cf5294bcbd7fa50f7d860e90a66db7e5059' */ - if (x86_iommu_get_default()->intr_supported) { + if (x86_iommu_ir_supported(x86_iommu_get_default())) { build_append_int_noprefix(table_data, (0x1ull << 56) | /* type IOAPIC */ (IOAPIC_SB_DEVID << 40) | /* IOAPIC devid */ diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 353a810e6b..8ad707aba0 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1233,7 +1233,7 @@ static int amdvi_int_remap_msi(AMDVIState *iommu, } /* validate that we are configure with intremap=on */ - if (!X86_IOMMU_DEVICE(iommu)->intr_supported) { + if (!x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu))) { trace_amdvi_err("Interrupt remapping is enabled in the guest but " "not in the host. Use intremap=on to enable interrupt " "remapping in amd-iommu."); diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 96ef31eb7e..8b72735650 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3169,7 +3169,7 @@ static void vtd_init(IntelIOMMUState *s) vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits); vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->aw_bits); - if (x86_iommu->intr_supported) { + if (x86_iommu_ir_supported(x86_iommu)) { s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV; if (s->intr_eim == ON_OFF_AUTO_ON) { s->ecap |= VTD_ECAP_EIM; @@ -3270,14 +3270,14 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) { X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); - if (s->intr_eim == ON_OFF_AUTO_ON && !x86_iommu->intr_supported) { + if (s->intr_eim == ON_OFF_AUTO_ON && !x86_iommu_ir_supported(x86_iommu)) { error_setg(errp, "eim=on cannot be selected without intremap=on"); return false; } if (s->intr_eim == ON_OFF_AUTO_AUTO) { s->intr_eim = (kvm_irqchip_in_kernel() || s->buggy_eim) - && x86_iommu->intr_supported ? + && x86_iommu_ir_supported(x86_iommu) ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; } if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) { diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 470cc5daf9..f248662e97 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1244,7 +1244,7 @@ void pc_machine_done(Notifier *notifier, void *data) if (pcms->apic_id_limit > 255 && !xen_enabled()) { IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default()); - if (!iommu || !iommu->x86_iommu.intr_supported || + if (!iommu || !x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu)) || iommu->intr_eim != ON_OFF_AUTO_ON) { error_report("current -smp configuration requires " "Extended Interrupt Mode enabled. " diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index abc3c03158..61ee0f1eaa 100644 --- a/hw/i386/x86-iommu.c +++ b/hw/i386/x86-iommu.c @@ -119,8 +119,13 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp) return; } + /* If the user didn't specify IR, choose a default value for it */ + if (x86_iommu->intr_supported == ON_OFF_AUTO_AUTO) { + x86_iommu->intr_supported = ON_OFF_AUTO_OFF; + } + /* Both Intel and AMD IOMMU IR only support "kernel-irqchip={off|split}" */ - if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() && + if (x86_iommu_ir_supported(x86_iommu) && kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) { error_setg(errp, "Interrupt Remapping cannot work with " "kernel-irqchip=on, please use 'split|off'."); @@ -135,7 +140,8 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp) } static Property x86_iommu_properties[] = { - DEFINE_PROP_BOOL("intremap", X86IOMMUState, intr_supported, false), + DEFINE_PROP_ON_OFF_AUTO("intremap", X86IOMMUState, + intr_supported, ON_OFF_AUTO_AUTO), DEFINE_PROP_BOOL("device-iotlb", X86IOMMUState, dt_supported, false), DEFINE_PROP_BOOL("pt", X86IOMMUState, pt_supported, true), DEFINE_PROP_END_OF_LIST(), @@ -148,6 +154,11 @@ static void x86_iommu_class_init(ObjectClass *klass, void *data) dc->props = x86_iommu_properties; } +bool x86_iommu_ir_supported(X86IOMMUState *s) +{ + return s->intr_supported == ON_OFF_AUTO_ON; +} + static const TypeInfo x86_iommu_info = { .name = TYPE_X86_IOMMU_DEVICE, .parent = TYPE_SYS_BUS_DEVICE, diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h index 2b22a579a3..dcd9719a2c 100644 --- a/include/hw/i386/x86-iommu.h +++ b/include/hw/i386/x86-iommu.h @@ -74,13 +74,15 @@ typedef struct IEC_Notifier IEC_Notifier; struct X86IOMMUState { SysBusDevice busdev; - bool intr_supported; /* Whether vIOMMU supports IR */ + OnOffAuto intr_supported; /* Whether vIOMMU supports IR */ bool dt_supported; /* Whether vIOMMU supports DT */ bool pt_supported; /* Whether vIOMMU supports pass-through */ IommuType type; /* IOMMU type - AMD/Intel */ QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */ }; +bool x86_iommu_ir_supported(X86IOMMUState *s); + /* Generic IRQ entry information when interrupt remapping is enabled */ struct X86IOMMUIrq { /* Used by both IOAPIC/MSI interrupt remapping */ From 47748bbba24d4f4680b77da3dc5b4da531cd17d4 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 20 Dec 2018 13:40:37 +0800 Subject: [PATCH 44/44] x86-iommu: turn on IR by default if proper When the user didn't specify "intremap" for the IOMMU device, we turn it on by default if it is supported. This will turn IR on for the default Q35 platform as long as the IOMMU device is specified on new kernels. Signed-off-by: Peter Xu Acked-by: Paolo Bonzini Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/x86-iommu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index 61ee0f1eaa..d1534c1ae0 100644 --- a/hw/i386/x86-iommu.c +++ b/hw/i386/x86-iommu.c @@ -112,6 +112,7 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp) PCMachineState *pcms = PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE)); QLIST_INIT(&x86_iommu->iec_notifiers); + bool irq_all_kernel = kvm_irqchip_in_kernel() && !kvm_irqchip_is_split(); if (!pcms || !pcms->bus) { error_setg(errp, "Machine-type '%s' not supported by IOMMU", @@ -121,12 +122,12 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp) /* If the user didn't specify IR, choose a default value for it */ if (x86_iommu->intr_supported == ON_OFF_AUTO_AUTO) { - x86_iommu->intr_supported = ON_OFF_AUTO_OFF; + x86_iommu->intr_supported = irq_all_kernel ? + ON_OFF_AUTO_OFF : ON_OFF_AUTO_ON; } /* Both Intel and AMD IOMMU IR only support "kernel-irqchip={off|split}" */ - if (x86_iommu_ir_supported(x86_iommu) && kvm_irqchip_in_kernel() && - !kvm_irqchip_is_split()) { + if (x86_iommu_ir_supported(x86_iommu) && irq_all_kernel) { error_setg(errp, "Interrupt Remapping cannot work with " "kernel-irqchip=on, please use 'split|off'."); return;