diff --git a/hw/e1000.c b/hw/e1000.c index 80b78bc618..7d7d14002f 100644 --- a/hw/e1000.c +++ b/hw/e1000.c @@ -345,7 +345,7 @@ is_vlan_txd(uint32_t txd_lower) /* FCS aka Ethernet CRC-32. We don't get it from backends and can't * fill it in, just pad descriptor length by 4 bytes unless guest - * told us to trip it off the packet. */ + * told us to strip it off the packet. */ static inline int fcs_len(E1000State *s) { @@ -690,9 +690,14 @@ e1000_receive(VLANClientState *nc, const uint8_t *buf, size_t size) s->mac_reg[GPRC]++; s->mac_reg[TPR]++; - n = s->mac_reg[TORL]; - if ((s->mac_reg[TORL] += size) < n) + /* TOR - Total Octets Received: + * This register includes bytes received in a packet from the field through the field, inclusively. + */ + n = s->mac_reg[TORL] + size + /* Always include FCS length. */ 4; + if (n < s->mac_reg[TORL]) s->mac_reg[TORH]++; + s->mac_reg[TORL] = n; n = E1000_ICS_RXT0; if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH]) diff --git a/hw/s390-virtio-bus.c b/hw/s390-virtio-bus.c index fe6884d47d..784dc01b97 100644 --- a/hw/s390-virtio-bus.c +++ b/hw/s390-virtio-bus.c @@ -27,6 +27,7 @@ #include "elf.h" #include "hw/virtio.h" #include "hw/virtio-serial.h" +#include "hw/virtio-net.h" #include "hw/sysbus.h" #include "kvm.h" @@ -110,7 +111,7 @@ static int s390_virtio_net_init(VirtIOS390Device *dev) { VirtIODevice *vdev; - vdev = virtio_net_init((DeviceState *)dev, &dev->nic); + vdev = virtio_net_init((DeviceState *)dev, &dev->nic, &dev->net); if (!vdev) { return -1; } @@ -327,6 +328,11 @@ static VirtIOS390DeviceInfo s390_virtio_net = { .qdev.size = sizeof(VirtIOS390Device), .qdev.props = (Property[]) { DEFINE_NIC_PROPERTIES(VirtIOS390Device, nic), + DEFINE_PROP_UINT32("x-txtimer", VirtIOS390Device, + net.txtimer, TX_TIMER_INTERVAL), + DEFINE_PROP_INT32("x-txburst", VirtIOS390Device, + net.txburst, TX_BURST), + DEFINE_PROP_STRING("tx", VirtIOS390Device, net.tx), DEFINE_PROP_END_OF_LIST(), }, }; diff --git a/hw/s390-virtio-bus.h b/hw/s390-virtio-bus.h index 333fea8963..41558c9c67 100644 --- a/hw/s390-virtio-bus.h +++ b/hw/s390-virtio-bus.h @@ -43,6 +43,7 @@ typedef struct VirtIOS390Device { uint32_t host_features; /* Max. number of ports we can have for a the virtio-serial device */ uint32_t max_virtserial_ports; + virtio_net_conf net; } VirtIOS390Device; typedef struct VirtIOS390Bus { diff --git a/hw/syborg_virtio.c b/hw/syborg_virtio.c index abf0370107..4dfd1a87b9 100644 --- a/hw/syborg_virtio.c +++ b/hw/syborg_virtio.c @@ -68,6 +68,7 @@ typedef struct { uint32_t id; NICConf nic; uint32_t host_features; + virtio_net_conf net; } SyborgVirtIOProxy; static uint32_t syborg_virtio_readl(void *opaque, target_phys_addr_t offset) @@ -284,7 +285,7 @@ static int syborg_virtio_net_init(SysBusDevice *dev) VirtIODevice *vdev; SyborgVirtIOProxy *proxy = FROM_SYSBUS(SyborgVirtIOProxy, dev); - vdev = virtio_net_init(&dev->qdev, &proxy->nic); + vdev = virtio_net_init(&dev->qdev, &proxy->nic, &proxy->net); return syborg_virtio_init(proxy, vdev); } @@ -295,6 +296,11 @@ static SysBusDeviceInfo syborg_virtio_net_info = { .qdev.props = (Property[]) { DEFINE_NIC_PROPERTIES(SyborgVirtIOProxy, nic), DEFINE_VIRTIO_NET_FEATURES(SyborgVirtIOProxy, host_features), + DEFINE_PROP_UINT32("x-txtimer", SyborgVirtIOProxy, + net.txtimer, TX_TIMER_INTERVAL), + DEFINE_PROP_INT32("x-txburst", SyborgVirtIOProxy, + net.txburst, TX_BURST), + DEFINE_PROP_STRING("tx", SyborgVirtIOProxy, net.tx), DEFINE_PROP_END_OF_LIST(), } }; diff --git a/hw/vhost_net.c b/hw/vhost_net.c index 0c00de272a..4a7b8194f2 100644 --- a/hw/vhost_net.c +++ b/hw/vhost_net.c @@ -50,7 +50,9 @@ unsigned vhost_net_get_features(struct vhost_net *net, unsigned features) if (!(net->dev.features & (1 << VIRTIO_RING_F_INDIRECT_DESC))) { features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); } - features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF); + if (!(net->dev.features & (1 << VIRTIO_NET_F_MRG_RXBUF))) { + features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF); + } return features; } @@ -63,6 +65,9 @@ void vhost_net_ack_features(struct vhost_net *net, unsigned features) if (features & (1 << VIRTIO_RING_F_INDIRECT_DESC)) { net->dev.acked_features |= (1 << VIRTIO_RING_F_INDIRECT_DESC); } + if (features & (1 << VIRTIO_NET_F_MRG_RXBUF)) { + net->dev.acked_features |= (1 << VIRTIO_NET_F_MRG_RXBUF); + } } static int vhost_net_get_fd(VLANClientState *backend) @@ -97,6 +102,10 @@ struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd) if (r < 0) { goto fail; } + if (!tap_has_vnet_hdr_len(backend, + sizeof(struct virtio_net_hdr_mrg_rxbuf))) { + net->dev.features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF); + } if (~net->dev.features & net->dev.backend_features) { fprintf(stderr, "vhost lacks feature mask %" PRIu64 " for backend\n", (uint64_t)(~net->dev.features & net->dev.backend_features)); @@ -117,6 +126,10 @@ int vhost_net_start(struct vhost_net *net, { struct vhost_vring_file file = { }; int r; + if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) { + tap_set_vnet_hdr_len(net->vc, + sizeof(struct virtio_net_hdr_mrg_rxbuf)); + } net->dev.nvqs = 2; net->dev.vqs = net->vqs; @@ -144,6 +157,9 @@ fail: } net->vc->info->poll(net->vc, true); vhost_dev_stop(&net->dev, dev); + if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) { + tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr)); + } return r; } @@ -158,11 +174,17 @@ void vhost_net_stop(struct vhost_net *net, } net->vc->info->poll(net->vc, true); vhost_dev_stop(&net->dev, dev); + if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) { + tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr)); + } } void vhost_net_cleanup(struct vhost_net *net) { vhost_dev_cleanup(&net->dev); + if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) { + tap_set_vnet_hdr_len(net->vc, sizeof(struct virtio_net_hdr)); + } qemu_free(net); } #else diff --git a/hw/virtio-net.c b/hw/virtio-net.c index 075f72df2d..0a9cae22c4 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -36,7 +36,10 @@ typedef struct VirtIONet VirtQueue *ctrl_vq; NICState *nic; QEMUTimer *tx_timer; - int tx_timer_active; + QEMUBH *tx_bh; + uint32_t tx_timeout; + int32_t tx_burst; + int tx_waiting; uint32_t has_vnet_hdr; uint8_t has_ufo; struct { @@ -619,7 +622,7 @@ static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_ return size; } -static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq); +static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq); static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len) { @@ -635,16 +638,18 @@ static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len) } /* TX */ -static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq) +static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq) { VirtQueueElement elem; + int32_t num_packets = 0; - if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) - return; + if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) { + return num_packets; + } if (n->async_tx.elem.out_num) { virtio_queue_set_notification(n->tx_vq, 0); - return; + return num_packets; } while (virtqueue_pop(vq, &elem)) { @@ -681,38 +686,55 @@ static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq) virtio_queue_set_notification(n->tx_vq, 0); n->async_tx.elem = elem; n->async_tx.len = len; - return; + return -EBUSY; } len += ret; virtqueue_push(vq, &elem, len); virtio_notify(&n->vdev, vq); + + if (++num_packets >= n->tx_burst) { + break; + } } + return num_packets; } -static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq) +static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) { VirtIONet *n = to_virtio_net(vdev); - if (n->tx_timer_active) { + if (n->tx_waiting) { virtio_queue_set_notification(vq, 1); qemu_del_timer(n->tx_timer); - n->tx_timer_active = 0; + n->tx_waiting = 0; virtio_net_flush_tx(n, vq); } else { qemu_mod_timer(n->tx_timer, - qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL); - n->tx_timer_active = 1; + qemu_get_clock(vm_clock) + n->tx_timeout); + n->tx_waiting = 1; virtio_queue_set_notification(vq, 0); } } +static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIONet *n = to_virtio_net(vdev); + + if (unlikely(n->tx_waiting)) { + return; + } + virtio_queue_set_notification(vq, 0); + qemu_bh_schedule(n->tx_bh); + n->tx_waiting = 1; +} + static void virtio_net_tx_timer(void *opaque) { VirtIONet *n = opaque; - n->tx_timer_active = 0; + n->tx_waiting = 0; /* Just in case the driver is not ready on more */ if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) @@ -722,6 +744,41 @@ static void virtio_net_tx_timer(void *opaque) virtio_net_flush_tx(n, n->tx_vq); } +static void virtio_net_tx_bh(void *opaque) +{ + VirtIONet *n = opaque; + int32_t ret; + + n->tx_waiting = 0; + + /* Just in case the driver is not ready on more */ + if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))) + return; + + ret = virtio_net_flush_tx(n, n->tx_vq); + if (ret == -EBUSY) { + return; /* Notification re-enable handled by tx_complete */ + } + + /* If we flush a full burst of packets, assume there are + * more coming and immediately reschedule */ + if (ret >= n->tx_burst) { + qemu_bh_schedule(n->tx_bh); + n->tx_waiting = 1; + return; + } + + /* If less than a full burst, re-enable notification and flush + * anything that may have come in while we weren't looking. If + * we find something, assume the guest is still active and reschedule */ + virtio_queue_set_notification(n->tx_vq, 1); + if (virtio_net_flush_tx(n, n->tx_vq) > 0) { + virtio_queue_set_notification(n->tx_vq, 0); + qemu_bh_schedule(n->tx_bh); + n->tx_waiting = 1; + } +} + static void virtio_net_save(QEMUFile *f, void *opaque) { VirtIONet *n = opaque; @@ -735,7 +792,7 @@ static void virtio_net_save(QEMUFile *f, void *opaque) virtio_save(&n->vdev, f); qemu_put_buffer(f, n->mac, ETH_ALEN); - qemu_put_be32(f, n->tx_timer_active); + qemu_put_be32(f, n->tx_waiting); qemu_put_be32(f, n->mergeable_rx_bufs); qemu_put_be16(f, n->status); qemu_put_byte(f, n->promisc); @@ -764,7 +821,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) virtio_load(&n->vdev, f); qemu_get_buffer(f, n->mac, ETH_ALEN); - n->tx_timer_active = qemu_get_be32(f); + n->tx_waiting = qemu_get_be32(f); n->mergeable_rx_bufs = qemu_get_be32(f); if (version_id >= 3) @@ -840,9 +897,13 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) } n->mac_table.first_multi = i; - if (n->tx_timer_active) { - qemu_mod_timer(n->tx_timer, - qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL); + if (n->tx_waiting) { + if (n->tx_timer) { + qemu_mod_timer(n->tx_timer, + qemu_get_clock(vm_clock) + n->tx_timeout); + } else { + qemu_bh_schedule(n->tx_bh); + } } return 0; } @@ -903,7 +964,8 @@ static void virtio_net_vmstate_change(void *opaque, int running, int reason) virtio_net_set_status(&n->vdev, n->vdev.status & status); } -VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf) +VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf, + virtio_net_conf *net) { VirtIONet *n; @@ -919,7 +981,22 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf) n->vdev.reset = virtio_net_reset; n->vdev.set_status = virtio_net_set_status; n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx); - n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx); + + if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) { + fprintf(stderr, "virtio-net: " + "Unknown option tx=%s, valid options: \"timer\" \"bh\"\n", + net->tx); + fprintf(stderr, "Defaulting to \"bh\"\n"); + } + + if (net->tx && !strcmp(net->tx, "timer")) { + n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_timer); + n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n); + n->tx_timeout = net->txtimer; + } else { + n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_bh); + n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n); + } n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl); qemu_macaddr_default_if_unset(&conf->macaddr); memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac)); @@ -929,8 +1006,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf) qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a); - n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n); - n->tx_timer_active = 0; + n->tx_waiting = 0; + n->tx_burst = net->txburst; n->mergeable_rx_bufs = 0; n->promisc = 1; /* for compatibility */ @@ -962,8 +1039,12 @@ void virtio_net_exit(VirtIODevice *vdev) qemu_free(n->mac_table.macs); qemu_free(n->vlans); - qemu_del_timer(n->tx_timer); - qemu_free_timer(n->tx_timer); + if (n->tx_timer) { + qemu_del_timer(n->tx_timer); + qemu_free_timer(n->tx_timer); + } else { + qemu_bh_delete(n->tx_bh); + } virtio_cleanup(&n->vdev); qemu_del_vlan_client(&n->nic->nc); diff --git a/hw/virtio-net.h b/hw/virtio-net.h index 235f1a9fa8..8af9a1ce55 100644 --- a/hw/virtio-net.h +++ b/hw/virtio-net.h @@ -49,6 +49,20 @@ #define TX_TIMER_INTERVAL 150000 /* 150 us */ +/* Limit the number of packets that can be sent via a single flush + * of the TX queue. This gives us a guaranteed exit condition and + * ensures fairness in the io path. 256 conveniently matches the + * length of the TX queue and shows a good balance of performance + * and latency. */ +#define TX_BURST 256 + +typedef struct virtio_net_conf +{ + uint32_t txtimer; + int32_t txburst; + char *tx; +} virtio_net_conf; + /* Maximum packet size we can receive from tap device: header + 64k */ #define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 << 10)) diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 6e8f88a141..86e6b0a561 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -107,6 +107,7 @@ typedef struct { #endif /* Max. number of ports we can have for a the virtio-serial device */ uint32_t max_virtserial_ports; + virtio_net_conf net; } VirtIOPCIProxy; /* virtio device */ @@ -613,7 +614,7 @@ static int virtio_net_init_pci(PCIDevice *pci_dev) VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev); VirtIODevice *vdev; - vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic); + vdev = virtio_net_init(&pci_dev->qdev, &proxy->nic, &proxy->net); vdev->nvectors = proxy->nvectors; virtio_init_pci(proxy, vdev, @@ -690,6 +691,11 @@ static PCIDeviceInfo virtio_info[] = { DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3), DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features), DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic), + DEFINE_PROP_UINT32("x-txtimer", VirtIOPCIProxy, + net.txtimer, TX_TIMER_INTERVAL), + DEFINE_PROP_INT32("x-txburst", VirtIOPCIProxy, + net.txburst, TX_BURST), + DEFINE_PROP_STRING("tx", VirtIOPCIProxy, net.tx), DEFINE_PROP_END_OF_LIST(), }, .qdev.reset = virtio_pci_reset, diff --git a/hw/virtio.h b/hw/virtio.h index 5836ab61e7..a60d7355de 100644 --- a/hw/virtio.h +++ b/hw/virtio.h @@ -185,7 +185,9 @@ void virtio_bind_device(VirtIODevice *vdev, const VirtIOBindings *binding, /* Base devices. */ VirtIODevice *virtio_blk_init(DeviceState *dev, BlockConf *conf); -VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf); +struct virtio_net_conf; +VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf, + struct virtio_net_conf *net); VirtIODevice *virtio_serial_init(DeviceState *dev, uint32_t max_nr_ports); VirtIODevice *virtio_balloon_init(DeviceState *dev); #ifdef CONFIG_LINUX diff --git a/net/tap-aix.c b/net/tap-aix.c index 4bc9f2f6d2..e19aaba110 100644 --- a/net/tap-aix.c +++ b/net/tap-aix.c @@ -46,6 +46,15 @@ int tap_probe_has_ufo(int fd) return 0; } +int tap_probe_vnet_hdr_len(int fd, int len) +{ + return 0; +} + +void tap_fd_set_vnet_hdr_len(int fd, int len) +{ +} + void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo) { diff --git a/net/tap-bsd.c b/net/tap-bsd.c index 3773d5da5e..3513075337 100644 --- a/net/tap-bsd.c +++ b/net/tap-bsd.c @@ -116,6 +116,15 @@ int tap_probe_has_ufo(int fd) return 0; } +int tap_probe_vnet_hdr_len(int fd, int len) +{ + return 0; +} + +void tap_fd_set_vnet_hdr_len(int fd, int len) +{ +} + void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo) { diff --git a/net/tap-linux.c b/net/tap-linux.c index c92983cb53..f7aa9041b8 100644 --- a/net/tap-linux.c +++ b/net/tap-linux.c @@ -129,6 +129,35 @@ int tap_probe_has_ufo(int fd) return 1; } +/* Verify that we can assign given length */ +int tap_probe_vnet_hdr_len(int fd, int len) +{ + int orig; + if (ioctl(fd, TUNGETVNETHDRSZ, &orig) == -1) { + return 0; + } + if (ioctl(fd, TUNSETVNETHDRSZ, &len) == -1) { + return 0; + } + /* Restore original length: we can't handle failure. */ + if (ioctl(fd, TUNSETVNETHDRSZ, &orig) == -1) { + fprintf(stderr, "TUNGETVNETHDRSZ ioctl() failed: %s. Exiting.\n", + strerror(errno)); + assert(0); + return -errno; + } + return 1; +} + +void tap_fd_set_vnet_hdr_len(int fd, int len) +{ + if (ioctl(fd, TUNSETVNETHDRSZ, &len) == -1) { + fprintf(stderr, "TUNSETVNETHDRSZ ioctl() failed: %s. Exiting.\n", + strerror(errno)); + assert(0); + } +} + void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo) { diff --git a/net/tap-linux.h b/net/tap-linux.h index 9f943589bf..659e98122b 100644 --- a/net/tap-linux.h +++ b/net/tap-linux.h @@ -27,6 +27,8 @@ #define TUNSETOFFLOAD _IOW('T', 208, unsigned int) #define TUNGETIFF _IOR('T', 210, unsigned int) #define TUNSETSNDBUF _IOW('T', 212, int) +#define TUNGETVNETHDRSZ _IOR('T', 215, int) +#define TUNSETVNETHDRSZ _IOW('T', 216, int) #endif @@ -52,4 +54,10 @@ struct virtio_net_hdr uint16_t csum_offset; }; +struct virtio_net_hdr_mrg_rxbuf +{ + struct virtio_net_hdr hdr; + uint16_t num_buffers; /* Number of merged rx buffers */ +}; + #endif /* QEMU_TAP_H */ diff --git a/net/tap-solaris.c b/net/tap-solaris.c index 4ac61d2ad6..c216d28267 100644 --- a/net/tap-solaris.c +++ b/net/tap-solaris.c @@ -212,6 +212,15 @@ int tap_probe_has_ufo(int fd) return 0; } +int tap_probe_vnet_hdr_len(int fd, int len) +{ + return 0; +} + +void tap_fd_set_vnet_hdr_len(int fd, int len) +{ +} + void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo) { diff --git a/net/tap-win32.c b/net/tap-win32.c index 74348daf5a..9fe4fcd5f4 100644 --- a/net/tap-win32.c +++ b/net/tap-win32.c @@ -728,6 +728,15 @@ int tap_has_vnet_hdr(VLANClientState *vc) return 0; } +int tap_probe_vnet_hdr_len(int fd, int len) +{ + return 0; +} + +void tap_fd_set_vnet_hdr_len(int fd, int len) +{ +} + void tap_using_vnet_hdr(VLANClientState *vc, int using_vnet_hdr) { } diff --git a/net/tap.c b/net/tap.c index 0147dabc15..4afb314fde 100644 --- a/net/tap.c +++ b/net/tap.c @@ -57,10 +57,10 @@ typedef struct TAPState { uint8_t buf[TAP_BUFSIZE]; unsigned int read_poll : 1; unsigned int write_poll : 1; - unsigned int has_vnet_hdr : 1; unsigned int using_vnet_hdr : 1; unsigned int has_ufo: 1; VHostNetState *vhost_net; + unsigned host_vnet_hdr_len; } TAPState; static int launch_script(const char *setup_script, const char *ifname, int fd); @@ -121,11 +121,11 @@ static ssize_t tap_receive_iov(VLANClientState *nc, const struct iovec *iov, TAPState *s = DO_UPCAST(TAPState, nc, nc); const struct iovec *iovp = iov; struct iovec iov_copy[iovcnt + 1]; - struct virtio_net_hdr hdr = { 0, }; + struct virtio_net_hdr_mrg_rxbuf hdr = { }; - if (s->has_vnet_hdr && !s->using_vnet_hdr) { + if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { iov_copy[0].iov_base = &hdr; - iov_copy[0].iov_len = sizeof(hdr); + iov_copy[0].iov_len = s->host_vnet_hdr_len; memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov)); iovp = iov_copy; iovcnt++; @@ -139,11 +139,11 @@ static ssize_t tap_receive_raw(VLANClientState *nc, const uint8_t *buf, size_t s TAPState *s = DO_UPCAST(TAPState, nc, nc); struct iovec iov[2]; int iovcnt = 0; - struct virtio_net_hdr hdr = { 0, }; + struct virtio_net_hdr_mrg_rxbuf hdr = { }; - if (s->has_vnet_hdr) { + if (s->host_vnet_hdr_len) { iov[iovcnt].iov_base = &hdr; - iov[iovcnt].iov_len = sizeof(hdr); + iov[iovcnt].iov_len = s->host_vnet_hdr_len; iovcnt++; } @@ -159,7 +159,7 @@ static ssize_t tap_receive(VLANClientState *nc, const uint8_t *buf, size_t size) TAPState *s = DO_UPCAST(TAPState, nc, nc); struct iovec iov[1]; - if (s->has_vnet_hdr && !s->using_vnet_hdr) { + if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { return tap_receive_raw(nc, buf, size); } @@ -202,9 +202,9 @@ static void tap_send(void *opaque) break; } - if (s->has_vnet_hdr && !s->using_vnet_hdr) { - buf += sizeof(struct virtio_net_hdr); - size -= sizeof(struct virtio_net_hdr); + if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { + buf += s->host_vnet_hdr_len; + size -= s->host_vnet_hdr_len; } size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed); @@ -229,7 +229,28 @@ int tap_has_vnet_hdr(VLANClientState *nc) assert(nc->info->type == NET_CLIENT_TYPE_TAP); - return s->has_vnet_hdr; + return !!s->host_vnet_hdr_len; +} + +int tap_has_vnet_hdr_len(VLANClientState *nc, int len) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + + assert(nc->info->type == NET_CLIENT_TYPE_TAP); + + return tap_probe_vnet_hdr_len(s->fd, len); +} + +void tap_set_vnet_hdr_len(VLANClientState *nc, int len) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + + assert(nc->info->type == NET_CLIENT_TYPE_TAP); + assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) || + len == sizeof(struct virtio_net_hdr)); + + tap_fd_set_vnet_hdr_len(s->fd, len); + s->host_vnet_hdr_len = len; } void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr) @@ -239,7 +260,7 @@ void tap_using_vnet_hdr(VLANClientState *nc, int using_vnet_hdr) using_vnet_hdr = using_vnet_hdr != 0; assert(nc->info->type == NET_CLIENT_TYPE_TAP); - assert(s->has_vnet_hdr == using_vnet_hdr); + assert(!!s->host_vnet_hdr_len == using_vnet_hdr); s->using_vnet_hdr = using_vnet_hdr; } @@ -310,7 +331,7 @@ static TAPState *net_tap_fd_init(VLANState *vlan, s = DO_UPCAST(TAPState, nc, nc); s->fd = fd; - s->has_vnet_hdr = vnet_hdr != 0; + s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0; s->using_vnet_hdr = 0; s->has_ufo = tap_probe_has_ufo(s->fd); tap_set_offload(&s->nc, 0, 0, 0, 0, 0); diff --git a/net/tap.h b/net/tap.h index b8cec8320c..e44bd2bc5f 100644 --- a/net/tap.h +++ b/net/tap.h @@ -40,13 +40,17 @@ ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen); int tap_has_ufo(VLANClientState *vc); int tap_has_vnet_hdr(VLANClientState *vc); +int tap_has_vnet_hdr_len(VLANClientState *vc, int len); void tap_using_vnet_hdr(VLANClientState *vc, int using_vnet_hdr); void tap_set_offload(VLANClientState *vc, int csum, int tso4, int tso6, int ecn, int ufo); +void tap_set_vnet_hdr_len(VLANClientState *vc, int len); int tap_set_sndbuf(int fd, QemuOpts *opts); int tap_probe_vnet_hdr(int fd); +int tap_probe_vnet_hdr_len(int fd, int len); int tap_probe_has_ufo(int fd); void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo); +void tap_fd_set_vnet_hdr_len(int fd, int len); int tap_get_fd(VLANClientState *vc);