virtio,pci infrastructure

This includes infrastructure patches that don't do much by themselves
 but should help vfio and q35 make progress.
 Also included is rework of virtio-net to use iovec APIs
 for vector access - helpful to make it more secure
 and in preparation for a new feature that will allow
 arbitrary s/g layout for guests.
 Also included is a pci bridge bugfix by Avi.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQEcBAABAgAGBQJQjrGtAAoJECgfDbjSjVRpoRUH/iW2wYfnPtgjIF2gSBmdlmLK
 m5uLSuayHdiNXfjXeNJRySQDu4HZU3K9fDnU+HIMUUmvx/jgaMIbSQZ1gayqQhf8
 T1WH21wkkSTEnW0y9t+C3NtRzdK6AbQ1Jy/TqgP++tjT0w54QkoGKEnX3mvvNoDv
 xdT+dYcpzMFLZ2KaimP73EwILMMR45iRTPhrjSN2wlwCPPnNcg1Jt0Atb6pYveja
 1/46ZY+VxNgRtVSNhnV8xeyyJgwPOrUReKlrqD67YQAeMbzqrel9XDBJuZ3kScWt
 vv2AQ0Fe8GMkMkbNaQ0mFh6pXQ2arTAYb8sb6PColCLOn5yNqFz95YGebG36gxk=
 =HP23
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'mst/tags/for_anthony' into staging

virtio,pci infrastructure

This includes infrastructure patches that don't do much by themselves
but should help vfio and q35 make progress.
Also included is rework of virtio-net to use iovec APIs
for vector access - helpful to make it more secure
and in preparation for a new feature that will allow
arbitrary s/g layout for guests.
Also included is a pci bridge bugfix by Avi.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

* mst/tags/for_anthony: (25 commits)
  pci: avoid destroying bridge address space windows in a transaction
  virtio-net: enable mrg buf header in tap on linux
  virtio-net: test peer header support at init time
  virtio-net: minor code simplification
  virtio-net: simplify rx code
  virtio-net: switch tx to safe iov functions
  virtio-net: first s/g is always at start of buf
  virtio-net: refactor receive_hdr
  virtio-net: use safe iov operations for rx
  virtio-net: avoid sg copy
  iov: add iov_cpy
  virtio-net: track host/guest header length
  pcie: Convert PCIExpressHost to use the QOM.
  pcie: pass pcie window size to pcie_host_mmcfg_update()
  pci: Add class 0xc05 as 'SMBus'
  pci: introduce pci_swizzle_map_irq_fn() for standardized interrupt pin swizzle
  pci_ids: add intel 82801BA pci-to-pci bridge id
  pci: pci capability must be in PCI space
  pci: make each capability DWORD aligned
  qemu: enable PV EOI for qemu 1.3
  ...

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
Anthony Liguori 2012-10-29 14:31:47 -05:00
commit 233926fafa
15 changed files with 266 additions and 146 deletions

View File

@ -882,8 +882,7 @@ static int assign_intx(AssignedDevice *dev)
intx_route = pci_device_route_intx_to_irq(&dev->dev, dev->intpin);
assert(intx_route.mode != PCI_INTX_INVERTED);
if (dev->intx_route.mode == intx_route.mode &&
dev->intx_route.irq == intx_route.irq) {
if (!pci_intx_route_changed(&dev->intx_route, &intx_route)) {
return 0;
}
@ -997,12 +996,9 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
}
if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) {
uint8_t *pos = pci_dev->config + pci_dev->msi_cap;
MSIMessage msg;
MSIMessage msg = msi_get_message(pci_dev, 0);
int virq;
msg.address = pci_get_long(pos + PCI_MSI_ADDRESS_LO);
msg.data = pci_get_word(pos + PCI_MSI_DATA_32);
virq = kvm_irqchip_add_msi_route(kvm_state, msg);
if (virq < 0) {
perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route");

View File

@ -122,6 +122,31 @@ void msi_set_message(PCIDevice *dev, MSIMessage msg)
pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
}
MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
{
uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
unsigned int nr_vectors = msi_nr_vectors(flags);
MSIMessage msg;
assert(vector < nr_vectors);
if (msi64bit) {
msg.address = pci_get_quad(dev->config + msi_address_lo_off(dev));
} else {
msg.address = pci_get_long(dev->config + msi_address_lo_off(dev));
}
/* upper bit 31:16 is zero */
msg.data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
if (nr_vectors > 1) {
msg.data &= ~(nr_vectors - 1);
msg.data |= vector;
}
return msg;
}
bool msi_enabled(const PCIDevice *dev)
{
return msi_present(dev) &&
@ -249,8 +274,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
unsigned int nr_vectors = msi_nr_vectors(flags);
uint64_t address;
uint32_t data;
MSIMessage msg;
assert(vector < nr_vectors);
if (msi_is_masked(dev, vector)) {
@ -261,24 +285,13 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
return;
}
if (msi64bit) {
address = pci_get_quad(dev->config + msi_address_lo_off(dev));
} else {
address = pci_get_long(dev->config + msi_address_lo_off(dev));
}
/* upper bit 31:16 is zero */
data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
if (nr_vectors > 1) {
data &= ~(nr_vectors - 1);
data |= vector;
}
msg = msi_get_message(dev, vector);
MSI_DEV_PRINTF(dev,
"notify vector 0x%x"
" address: 0x%"PRIx64" data: 0x%"PRIx32"\n",
vector, address, data);
stl_le_phys(address, data);
vector, msg.address, msg.data);
stl_le_phys(msg.address, msg.data);
}
/* Normally called by pci_default_write_config(). */

View File

@ -32,6 +32,7 @@ struct MSIMessage {
extern bool msi_supported;
void msi_set_message(PCIDevice *dev, MSIMessage msg);
MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector);
bool msi_enabled(const PCIDevice *dev);
int msi_init(struct PCIDevice *dev, uint8_t offset,
unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask);

View File

@ -43,6 +43,7 @@
#include "xen.h"
#include "memory.h"
#include "exec-memory.h"
#include "cpu.h"
#ifdef CONFIG_XEN
# include <xen/hvm/hvm_info_table.h>
#endif
@ -302,6 +303,12 @@ static void pc_init_pci(QEMUMachineInitArgs *args)
initrd_filename, cpu_model, 1, 1);
}
static void pc_init_pci_1_3(QEMUMachineInitArgs *args)
{
enable_kvm_pv_eoi();
pc_init_pci(args);
}
static void pc_init_pci_no_kvmclock(QEMUMachineInitArgs *args)
{
ram_addr_t ram_size = args->ram_size;
@ -349,7 +356,7 @@ static QEMUMachine pc_machine_v1_3 = {
.name = "pc-1.3",
.alias = "pc",
.desc = "Standard PC",
.init = pc_init_pci,
.init = pc_init_pci_1_3,
.max_cpus = 255,
.is_default = 1,
};

View File

@ -1117,10 +1117,21 @@ PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
pin = bus->map_irq(dev, pin);
dev = bus->parent_dev;
} while (dev);
assert(bus->route_intx_to_irq);
if (!bus->route_intx_to_irq) {
error_report("PCI: Bug - unimplemented PCI INTx routing (%s)\n",
object_get_typename(OBJECT(bus->qbus.parent)));
return (PCIINTxRoute) { PCI_INTX_DISABLED, -1 };
}
return bus->route_intx_to_irq(bus->irq_opaque, pin);
}
bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new)
{
return old->mode != new->mode || old->irq != new->irq;
}
void pci_bus_fire_intx_routing_notifier(PCIBus *bus)
{
PCIDevice *dev;
@ -1144,6 +1155,24 @@ void pci_device_set_intx_routing_notifier(PCIDevice *dev,
dev->intx_routing_notifier = notifier;
}
/*
* PCI-to-PCI bridge specification
* 9.1: Interrupt routing. Table 9-1
*
* the PCI Express Base Specification, Revision 2.1
* 2.2.8.1: INTx interrutp signaling - Rules
* the Implementation Note
* Table 2-20
*/
/*
* 0 <= pin <= 3 0 = INTA, 1 = INTB, 2 = INTC, 3 = INTD
* 0-origin unlike PCI interrupt pin register.
*/
int pci_swizzle_map_irq_fn(PCIDevice *pci_dev, int pin)
{
return (pin + PCI_SLOT(pci_dev->devfn)) % PCI_NUM_PINS;
}
/***********************************************************/
/* monitor info on PCI */
@ -1208,6 +1237,7 @@ static const pci_class_desc pci_class_descriptions[] =
{ 0x0c02, "SSA controller", "ssa"},
{ 0x0c03, "USB controller", "usb"},
{ 0x0c04, "Fibre channel controller", "fibre-channel"},
{ 0x0c05, "SMBus"},
{ 0, NULL}
};
@ -1667,16 +1697,16 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
return pci_create_simple_multifunction(bus, devfn, false, name);
}
static int pci_find_space(PCIDevice *pdev, uint8_t size)
static uint8_t pci_find_space(PCIDevice *pdev, uint8_t size)
{
int config_size = pci_config_size(pdev);
int offset = PCI_CONFIG_HEADER_SIZE;
int i;
for (i = PCI_CONFIG_HEADER_SIZE; i < config_size; ++i)
for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i) {
if (pdev->used[i])
offset = i + 1;
else if (i - offset + 1 == size)
return offset;
}
return 0;
}
@ -1895,7 +1925,7 @@ int pci_add_capability(PCIDevice *pdev, uint8_t cap_id,
config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
pdev->config[PCI_CAPABILITY_LIST] = offset;
pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
memset(pdev->used + offset, 0xFF, size);
memset(pdev->used + offset, 0xFF, QEMU_ALIGN_UP(size, 4));
/* Make capability read-only by default */
memset(pdev->wmask + offset, 0, size);
/* Check capability by default */
@ -1915,7 +1945,7 @@ void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
memset(pdev->w1cmask + offset, 0, size);
/* Clear cmask as device-specific registers can't be checked */
memset(pdev->cmask + offset, 0, size);
memset(pdev->used + offset, 0, size);
memset(pdev->used + offset, 0, QEMU_ALIGN_UP(size, 4));
if (!pdev->config[PCI_CAPABILITY_LIST])
pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;

View File

@ -318,6 +318,8 @@ void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
void *irq_opaque, int nirq);
int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
void pci_bus_hotplug(PCIBus *bus, pci_hotplug_fn hotplug, DeviceState *dev);
/* 0 <= pin <= 3 0 = INTA, 1 = INTB, 2 = INTC, 3 = INTD */
int pci_swizzle_map_irq_fn(PCIDevice *pci_dev, int pin);
PCIBus *pci_register_bus(DeviceState *parent, const char *name,
pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
void *irq_opaque,
@ -326,6 +328,7 @@ PCIBus *pci_register_bus(DeviceState *parent, const char *name,
uint8_t devfn_min, int nirq);
void pci_bus_set_route_irq_fn(PCIBus *, pci_route_irq_fn);
PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin);
bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
void pci_device_set_intx_routing_notifier(PCIDevice *dev,
PCIINTxRoutingNotifier notifier);

View File

@ -31,6 +31,7 @@
#define PCI_CLASS_SYSTEM_OTHER 0x0880
#define PCI_CLASS_SERIAL_USB 0x0c03
#define PCI_CLASS_SERIAL_SMBUS 0x0c05
#define PCI_CLASS_BRIDGE_HOST 0x0600
#define PCI_CLASS_BRIDGE_ISA 0x0601
@ -105,6 +106,7 @@
#define PCI_DEVICE_ID_INTEL_82378 0x0484
#define PCI_DEVICE_ID_INTEL_82441 0x1237
#define PCI_DEVICE_ID_INTEL_82801AA_5 0x2415
#define PCI_DEVICE_ID_INTEL_82801BA_11 0x244e
#define PCI_DEVICE_ID_INTEL_82801D 0x24CD
#define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab
#define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000

View File

@ -107,14 +107,9 @@ static const MemoryRegionOps pcie_mmcfg_ops = {
/* pcie_host::base_addr == PCIE_BASE_ADDR_UNMAPPED when it isn't mapped. */
#define PCIE_BASE_ADDR_UNMAPPED ((hwaddr)-1ULL)
int pcie_host_init(PCIExpressHost *e, uint32_t size)
int pcie_host_init(PCIExpressHost *e)
{
assert(!(size & (size - 1))); /* power of 2 */
assert(size >= PCIE_MMCFG_SIZE_MIN);
assert(size <= PCIE_MMCFG_SIZE_MAX);
e->base_addr = PCIE_BASE_ADDR_UNMAPPED;
e->size = size;
memory_region_init_io(&e->mmio, &pcie_mmcfg_ops, e, "pcie-mmcfg", e->size);
return 0;
}
@ -123,22 +118,44 @@ void pcie_host_mmcfg_unmap(PCIExpressHost *e)
{
if (e->base_addr != PCIE_BASE_ADDR_UNMAPPED) {
memory_region_del_subregion(get_system_memory(), &e->mmio);
memory_region_destroy(&e->mmio);
e->base_addr = PCIE_BASE_ADDR_UNMAPPED;
}
}
void pcie_host_mmcfg_map(PCIExpressHost *e, hwaddr addr)
void pcie_host_mmcfg_map(PCIExpressHost *e, hwaddr addr,
uint32_t size)
{
assert(!(size & (size - 1))); /* power of 2 */
assert(size >= PCIE_MMCFG_SIZE_MIN);
assert(size <= PCIE_MMCFG_SIZE_MAX);
e->size = size;
memory_region_init_io(&e->mmio, &pcie_mmcfg_ops, e, "pcie-mmcfg", e->size);
e->base_addr = addr;
memory_region_add_subregion(get_system_memory(), e->base_addr, &e->mmio);
}
void pcie_host_mmcfg_update(PCIExpressHost *e,
int enable,
hwaddr addr)
hwaddr addr,
uint32_t size)
{
pcie_host_mmcfg_unmap(e);
if (enable) {
pcie_host_mmcfg_map(e, addr);
pcie_host_mmcfg_map(e, addr, size);
}
}
static const TypeInfo pcie_host_type_info = {
.name = TYPE_PCIE_HOST_BRIDGE,
.parent = TYPE_PCI_HOST_BRIDGE,
.abstract = true,
.instance_size = sizeof(PCIExpressHost),
};
static void pcie_host_register_types(void)
{
type_register_static(&pcie_host_type_info);
}
type_init(pcie_host_register_types)

View File

@ -24,6 +24,10 @@
#include "pci_host.h"
#include "memory.h"
#define TYPE_PCIE_HOST_BRIDGE "pcie-host-bridge"
#define PCIE_HOST_BRIDGE(obj) \
OBJECT_CHECK(PCIExpressHost, (obj), TYPE_PCIE_HOST_BRIDGE)
struct PCIExpressHost {
PCIHostState pci;
@ -39,11 +43,12 @@ struct PCIExpressHost {
MemoryRegion mmio;
};
int pcie_host_init(PCIExpressHost *e, uint32_t size);
int pcie_host_init(PCIExpressHost *e);
void pcie_host_mmcfg_unmap(PCIExpressHost *e);
void pcie_host_mmcfg_map(PCIExpressHost *e, hwaddr addr);
void pcie_host_mmcfg_map(PCIExpressHost *e, hwaddr addr, uint32_t size);
void pcie_host_mmcfg_update(PCIExpressHost *e,
int enable,
hwaddr addr);
hwaddr addr,
uint32_t size);
#endif /* PCIE_HOST_H */

View File

@ -150,10 +150,6 @@ int vhost_net_start(struct vhost_net *net,
if (r < 0) {
goto fail_notifiers;
}
if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
tap_set_vnet_hdr_len(net->nc,
sizeof(struct virtio_net_hdr_mrg_rxbuf));
}
r = vhost_dev_start(&net->dev, dev);
if (r < 0) {
@ -179,9 +175,6 @@ fail:
}
net->nc->info->poll(net->nc, true);
vhost_dev_stop(&net->dev, dev);
if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
tap_set_vnet_hdr_len(net->nc, sizeof(struct virtio_net_hdr));
}
fail_start:
vhost_dev_disable_notifiers(&net->dev, dev);
fail_notifiers:
@ -199,18 +192,12 @@ void vhost_net_stop(struct vhost_net *net,
}
net->nc->info->poll(net->nc, true);
vhost_dev_stop(&net->dev, dev);
if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
tap_set_vnet_hdr_len(net->nc, sizeof(struct virtio_net_hdr));
}
vhost_dev_disable_notifiers(&net->dev, dev);
}
void vhost_net_cleanup(struct vhost_net *net)
{
vhost_dev_cleanup(&net->dev);
if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
tap_set_vnet_hdr_len(net->nc, sizeof(struct virtio_net_hdr));
}
g_free(net);
}
#else

View File

@ -41,6 +41,8 @@ typedef struct VirtIONet
int32_t tx_burst;
int tx_waiting;
uint32_t has_vnet_hdr;
size_t host_hdr_len;
size_t guest_hdr_len;
uint8_t has_ufo;
struct {
VirtQueueElement elem;
@ -200,16 +202,19 @@ static void virtio_net_reset(VirtIODevice *vdev)
memset(n->vlans, 0, MAX_VLAN >> 3);
}
static int peer_has_vnet_hdr(VirtIONet *n)
static void peer_test_vnet_hdr(VirtIONet *n)
{
if (!n->nic->nc.peer)
return 0;
return;
if (n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP)
return 0;
return;
n->has_vnet_hdr = tap_has_vnet_hdr(n->nic->nc.peer);
}
static int peer_has_vnet_hdr(VirtIONet *n)
{
return n->has_vnet_hdr;
}
@ -223,15 +228,27 @@ static int peer_has_ufo(VirtIONet *n)
return n->has_ufo;
}
static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs)
{
n->mergeable_rx_bufs = mergeable_rx_bufs;
n->guest_hdr_len = n->mergeable_rx_bufs ?
sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
if (peer_has_vnet_hdr(n) &&
tap_has_vnet_hdr_len(n->nic->nc.peer, n->guest_hdr_len)) {
tap_set_vnet_hdr_len(n->nic->nc.peer, n->guest_hdr_len);
n->host_hdr_len = n->guest_hdr_len;
}
}
static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
{
VirtIONet *n = to_virtio_net(vdev);
features |= (1 << VIRTIO_NET_F_MAC);
if (peer_has_vnet_hdr(n)) {
tap_using_vnet_hdr(n->nic->nc.peer, 1);
} else {
if (!peer_has_vnet_hdr(n)) {
features &= ~(0x1 << VIRTIO_NET_F_CSUM);
features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
@ -277,7 +294,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
{
VirtIONet *n = to_virtio_net(vdev);
n->mergeable_rx_bufs = !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF));
virtio_net_set_mrg_rx_bufs(n, !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF)));
if (n->has_vnet_hdr) {
tap_set_offload(n->nic->nc.peer,
@ -499,41 +516,34 @@ static int virtio_net_has_buffers(VirtIONet *n, int bufsize)
* cache.
*/
static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
const uint8_t *buf, size_t size)
uint8_t *buf, size_t size)
{
if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
(size > 27 && size < 1500) && /* normal sized MTU */
(buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
(buf[23] == 17) && /* ip.protocol == UDP */
(buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
/* FIXME this cast is evil */
net_checksum_calculate((uint8_t *)buf, size);
net_checksum_calculate(buf, size);
hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
}
}
static int receive_header(VirtIONet *n, struct iovec *iov, int iovcnt,
const void *buf, size_t size, size_t hdr_len)
static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
const void *buf, size_t size)
{
struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)iov[0].iov_base;
int offset = 0;
hdr->flags = 0;
hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
if (n->has_vnet_hdr) {
memcpy(hdr, buf, sizeof(*hdr));
offset = sizeof(*hdr);
work_around_broken_dhclient(hdr, buf + offset, size - offset);
/* FIXME this cast is evil */
void *wbuf = (void *)buf;
work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
size - n->host_hdr_len);
iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
} else {
struct virtio_net_hdr hdr = {
.flags = 0,
.gso_type = VIRTIO_NET_HDR_GSO_NONE
};
iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
}
/* We only ever receive a struct virtio_net_hdr from the tapfd,
* but we may be passing along a larger header to the guest.
*/
iov[0].iov_base += hdr_len;
iov[0].iov_len -= hdr_len;
return offset;
}
static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
@ -546,9 +556,7 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
if (n->promisc)
return 1;
if (n->has_vnet_hdr) {
ptr += sizeof(struct virtio_net_hdr);
}
ptr += n->host_hdr_len;
if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
@ -592,19 +600,16 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
{
VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
struct virtio_net_hdr_mrg_rxbuf *mhdr = NULL;
size_t guest_hdr_len, offset, i, host_hdr_len;
struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
struct virtio_net_hdr_mrg_rxbuf mhdr;
unsigned mhdr_cnt = 0;
size_t offset, i, guest_offset;
if (!virtio_net_can_receive(&n->nic->nc))
return -1;
/* hdr_len refers to the header we supply to the guest */
guest_hdr_len = n->mergeable_rx_bufs ?
sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
host_hdr_len = n->has_vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
if (!virtio_net_has_buffers(n, size + guest_hdr_len - host_hdr_len))
if (!virtio_net_has_buffers(n, size + n->guest_hdr_len - n->host_hdr_len))
return 0;
if (!receive_filter(n, buf, size))
@ -615,7 +620,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
while (offset < size) {
VirtQueueElement elem;
int len, total;
struct iovec sg[VIRTQUEUE_MAX_SIZE];
const struct iovec *sg = elem.in_sg;
total = 0;
@ -626,7 +631,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
"i %zd mergeable %d offset %zd, size %zd, "
"guest hdr len %zd, host hdr len %zd guest features 0x%x",
i, n->mergeable_rx_bufs, offset, size,
guest_hdr_len, host_hdr_len, n->vdev.guest_features);
n->guest_hdr_len, n->host_hdr_len, n->vdev.guest_features);
exit(1);
}
@ -635,24 +640,25 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
exit(1);
}
if (!n->mergeable_rx_bufs && elem.in_sg[0].iov_len != guest_hdr_len) {
error_report("virtio-net header not in first element");
exit(1);
}
memcpy(&sg, &elem.in_sg[0], sizeof(sg[0]) * elem.in_num);
if (i == 0) {
if (n->mergeable_rx_bufs)
mhdr = (struct virtio_net_hdr_mrg_rxbuf *)sg[0].iov_base;
assert(offset == 0);
if (n->mergeable_rx_bufs) {
mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
sg, elem.in_num,
offsetof(typeof(mhdr), num_buffers),
sizeof(mhdr.num_buffers));
}
offset += receive_header(n, sg, elem.in_num,
buf + offset, size - offset, guest_hdr_len);
total += guest_hdr_len;
receive_header(n, sg, elem.in_num, buf, size);
offset = n->host_hdr_len;
total += n->guest_hdr_len;
guest_offset = n->guest_hdr_len;
} else {
guest_offset = 0;
}
/* copy in packet. ugh */
len = iov_from_buf(sg, elem.in_num, 0,
len = iov_from_buf(sg, elem.in_num, guest_offset,
buf + offset, size - offset);
total += len;
offset += len;
@ -665,7 +671,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
"i %zd mergeable %d offset %zd, size %zd, "
"guest hdr len %zd, host hdr len %zd",
i, n->mergeable_rx_bufs,
offset, size, guest_hdr_len, host_hdr_len);
offset, size, n->guest_hdr_len, n->host_hdr_len);
#endif
return size;
}
@ -674,8 +680,11 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
virtqueue_fill(n->rx_vq, &elem, total, i++);
}
if (mhdr) {
stw_p(&mhdr->num_buffers, i);
if (mhdr_cnt) {
stw_p(&mhdr.num_buffers, i);
iov_from_buf(mhdr_sg, mhdr_cnt,
0,
&mhdr.num_buffers, sizeof mhdr.num_buffers);
}
virtqueue_flush(n->rx_vq, i);
@ -716,33 +725,35 @@ static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
}
while (virtqueue_pop(vq, &elem)) {
ssize_t ret, len = 0;
ssize_t ret, len;
unsigned int out_num = elem.out_num;
struct iovec *out_sg = &elem.out_sg[0];
unsigned hdr_len;
struct iovec sg[VIRTQUEUE_MAX_SIZE];
/* hdr_len refers to the header received from the guest */
hdr_len = n->mergeable_rx_bufs ?
sizeof(struct virtio_net_hdr_mrg_rxbuf) :
sizeof(struct virtio_net_hdr);
if (out_num < 1 || out_sg->iov_len != hdr_len) {
if (out_num < 1) {
error_report("virtio-net header not in first element");
exit(1);
}
/* ignore the header if GSO is not supported */
if (!n->has_vnet_hdr) {
out_num--;
out_sg++;
len += hdr_len;
} else if (n->mergeable_rx_bufs) {
/* tapfd expects a struct virtio_net_hdr */
hdr_len -= sizeof(struct virtio_net_hdr);
out_sg->iov_len -= hdr_len;
len += hdr_len;
/*
* If host wants to see the guest header as is, we can
* pass it on unchanged. Otherwise, copy just the parts
* that host is interested in.
*/
assert(n->host_hdr_len <= n->guest_hdr_len);
if (n->host_hdr_len != n->guest_hdr_len) {
unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
out_sg, out_num,
0, n->host_hdr_len);
sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
out_sg, out_num,
n->guest_hdr_len, -1);
out_num = sg_num;
out_sg = sg;
}
len = n->guest_hdr_len;
ret = qemu_sendv_packet_async(&n->nic->nc, out_sg, out_num,
virtio_net_tx_complete);
if (ret == 0) {
@ -899,7 +910,8 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
qemu_get_buffer(f, n->mac, ETH_ALEN);
n->tx_waiting = qemu_get_be32(f);
n->mergeable_rx_bufs = qemu_get_be32(f);
virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f));
if (version_id >= 3)
n->status = qemu_get_be16(f);
@ -939,7 +951,6 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
}
if (n->has_vnet_hdr) {
tap_using_vnet_hdr(n->nic->nc.peer, 1);
tap_set_offload(n->nic->nc.peer,
(n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
(n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
@ -1038,12 +1049,19 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
n->status = VIRTIO_NET_S_LINK_UP;
n->nic = qemu_new_nic(&net_virtio_info, conf, object_get_typename(OBJECT(dev)), dev->id, n);
peer_test_vnet_hdr(n);
if (peer_has_vnet_hdr(n)) {
tap_using_vnet_hdr(n->nic->nc.peer, 1);
n->host_hdr_len = sizeof(struct virtio_net_hdr);
} else {
n->host_hdr_len = 0;
}
qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
n->tx_waiting = 0;
n->tx_burst = net->txburst;
n->mergeable_rx_bufs = 0;
virtio_net_set_mrg_rx_bufs(n, 0);
n->promisc = 1; /* for compatibility */
n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);

23
iov.c
View File

@ -228,3 +228,26 @@ void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
fprintf(fp, "\n");
}
}
unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
const struct iovec *iov, unsigned int iov_cnt,
size_t offset, size_t bytes)
{
size_t len;
unsigned int i, j;
for (i = 0, j = 0; i < iov_cnt && j < dst_iov_cnt && bytes; i++) {
if (offset >= iov[i].iov_len) {
offset -= iov[i].iov_len;
continue;
}
len = MIN(bytes, iov[i].iov_len - offset);
dst_iov[j].iov_base = iov[i].iov_base + offset;
dst_iov[j].iov_len = len;
j++;
bytes -= len;
offset = 0;
}
assert(offset == 0);
return j;
}

9
iov.h
View File

@ -86,3 +86,12 @@ ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt,
*/
void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
FILE *fp, const char *prefix, size_t limit);
/*
* Partial copy of vector from iov to dst_iov (data is not copied).
* dst_iov overlaps iov at a specified offset.
* size of dst_iov is at most bytes. dst vector count is returned.
*/
unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
const struct iovec *iov, unsigned int iov_cnt,
size_t offset, size_t bytes);

View File

@ -125,6 +125,25 @@ typedef struct model_features_t {
int check_cpuid = 0;
int enforce_cpuid = 0;
#if defined(CONFIG_KVM)
static uint32_t kvm_default_features = (1 << KVM_FEATURE_CLOCKSOURCE) |
(1 << KVM_FEATURE_NOP_IO_DELAY) |
(1 << KVM_FEATURE_MMU_OP) |
(1 << KVM_FEATURE_CLOCKSOURCE2) |
(1 << KVM_FEATURE_ASYNC_PF) |
(1 << KVM_FEATURE_STEAL_TIME) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
static const uint32_t kvm_pv_eoi_features = (0x1 << KVM_FEATURE_PV_EOI);
#else
static uint32_t kvm_default_features = 0;
static const uint32_t kvm_pv_eoi_features = 0;
#endif
void enable_kvm_pv_eoi(void)
{
kvm_default_features |= kvm_pv_eoi_features;
}
void host_cpuid(uint32_t function, uint32_t count,
uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
{
@ -1108,7 +1127,7 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
/* Features to be added*/
uint32_t plus_features = 0, plus_ext_features = 0;
uint32_t plus_ext2_features = 0, plus_ext3_features = 0;
uint32_t plus_kvm_features = 0, plus_svm_features = 0;
uint32_t plus_kvm_features = kvm_default_features, plus_svm_features = 0;
uint32_t plus_7_0_ebx_features = 0;
/* Features to be removed */
uint32_t minus_features = 0, minus_ext_features = 0;
@ -1128,18 +1147,6 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
memcpy(x86_cpu_def, def, sizeof(*def));
}
#if defined(CONFIG_KVM)
plus_kvm_features = (1 << KVM_FEATURE_CLOCKSOURCE) |
(1 << KVM_FEATURE_NOP_IO_DELAY) |
(1 << KVM_FEATURE_MMU_OP) |
(1 << KVM_FEATURE_CLOCKSOURCE2) |
(1 << KVM_FEATURE_ASYNC_PF) |
(1 << KVM_FEATURE_STEAL_TIME) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
#else
plus_kvm_features = 0;
#endif
add_flagname_to_bitmaps("hypervisor", &plus_features,
&plus_ext_features, &plus_ext2_features, &plus_ext3_features,
&plus_kvm_features, &plus_svm_features, &plus_7_0_ebx_features);

View File

@ -1188,4 +1188,6 @@ void do_smm_enter(CPUX86State *env1);
void cpu_report_tpr_access(CPUX86State *env, TPRAccess access);
void enable_kvm_pv_eoi(void);
#endif /* CPU_I386_H */