2008-12-17 19:13:11 +00:00
|
|
|
/*
|
|
|
|
* Virtio Network Device
|
|
|
|
*
|
|
|
|
* Copyright IBM, Corp. 2007
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Anthony Liguori <aliguori@us.ibm.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2. See
|
|
|
|
* the COPYING file in the top-level directory.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2016-01-26 18:17:07 +00:00
|
|
|
#include "qemu/osdep.h"
|
2019-10-29 12:49:04 +01:00
|
|
|
#include "qemu/atomic.h"
|
2012-12-17 18:20:00 +01:00
|
|
|
#include "qemu/iov.h"
|
Include qemu/main-loop.h less
In my "build everything" tree, changing qemu/main-loop.h triggers a
recompile of some 5600 out of 6600 objects (not counting tests and
objects that don't depend on qemu/osdep.h). It includes block/aio.h,
which in turn includes qemu/event_notifier.h, qemu/notify.h,
qemu/processor.h, qemu/qsp.h, qemu/queue.h, qemu/thread-posix.h,
qemu/thread.h, qemu/timer.h, and a few more.
Include qemu/main-loop.h only where it's needed. Touching it now
recompiles only some 1700 objects. For block/aio.h and
qemu/event_notifier.h, these numbers drop from 5600 to 2800. For the
others, they shrink only slightly.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20190812052359.30071-21-armbru@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
2019-08-12 07:23:50 +02:00
|
|
|
#include "qemu/main-loop.h"
|
2019-05-23 16:35:07 +02:00
|
|
|
#include "qemu/module.h"
|
2013-02-05 17:06:20 +01:00
|
|
|
#include "hw/virtio/virtio.h"
|
2012-10-24 08:43:34 +02:00
|
|
|
#include "net/net.h"
|
2009-10-22 17:49:03 +01:00
|
|
|
#include "net/checksum.h"
|
2009-10-22 17:49:05 +01:00
|
|
|
#include "net/tap.h"
|
2012-12-17 18:20:00 +01:00
|
|
|
#include "qemu/error-report.h"
|
|
|
|
#include "qemu/timer.h"
|
2019-10-29 12:49:04 +01:00
|
|
|
#include "qemu/option.h"
|
|
|
|
#include "qemu/option_int.h"
|
|
|
|
#include "qemu/config-file.h"
|
|
|
|
#include "qapi/qmp/qdict.h"
|
2013-02-05 17:06:20 +01:00
|
|
|
#include "hw/virtio/virtio-net.h"
|
|
|
|
#include "net/vhost_net.h"
|
2019-02-27 13:24:07 +00:00
|
|
|
#include "net/announce.h"
|
2013-04-11 16:29:57 +02:00
|
|
|
#include "hw/virtio/virtio-bus.h"
|
2018-02-01 12:18:31 +01:00
|
|
|
#include "qapi/error.h"
|
2018-02-11 10:36:01 +01:00
|
|
|
#include "qapi/qapi-events-net.h"
|
2019-08-12 07:23:51 +02:00
|
|
|
#include "hw/qdev-properties.h"
|
2019-10-29 12:49:04 +01:00
|
|
|
#include "qapi/qapi-types-migration.h"
|
|
|
|
#include "qapi/qapi-events-migration.h"
|
2014-06-24 19:42:54 +02:00
|
|
|
#include "hw/virtio/virtio-access.h"
|
2017-04-21 17:39:30 +02:00
|
|
|
#include "migration/misc.h"
|
2018-03-07 22:25:41 -05:00
|
|
|
#include "standard-headers/linux/ethtool.h"
|
2019-08-12 07:23:58 +02:00
|
|
|
#include "sysemu/sysemu.h"
|
2019-02-27 13:24:07 +00:00
|
|
|
#include "trace.h"
|
2019-10-29 12:49:04 +01:00
|
|
|
#include "monitor/qdev.h"
|
|
|
|
#include "hw/pci/pci.h"
|
2020-05-08 15:59:29 +03:00
|
|
|
#include "net_rx_pkt.h"
|
2020-07-01 22:55:37 +08:00
|
|
|
#include "hw/virtio/vhost.h"
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2009-10-22 17:43:50 +01:00
|
|
|
#define VIRTIO_NET_VM_VERSION 11
|
2009-02-05 22:36:28 +00:00
|
|
|
|
2009-06-05 14:47:23 -06:00
|
|
|
#define MAC_TABLE_ENTRIES 64
|
2009-02-05 22:36:32 +00:00
|
|
|
#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
|
2009-02-05 22:36:04 +00:00
|
|
|
|
2016-08-10 17:47:16 +03:00
|
|
|
/* previously fixed value */
|
|
|
|
#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
|
2017-06-28 10:37:59 +08:00
|
|
|
#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
|
|
|
|
|
2016-08-10 17:47:16 +03:00
|
|
|
/* for now, only allow larger queues; with virtio-1, guest can downsize */
|
|
|
|
#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
|
2017-06-28 10:37:59 +08:00
|
|
|
#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
|
2016-08-10 17:47:16 +03:00
|
|
|
|
virtio-net: support RSC v4/v6 tcp traffic for Windows HCK
This commit adds implementation of RX packets
coalescing, compatible with requirements of Windows
Hardware compatibility kit.
The device enables feature VIRTIO_NET_F_RSC_EXT in
host features if it supports extended RSC functionality
as defined in the specification.
This feature requires at least one of VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6. Windows guest driver acks
this feature only if VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
is also present.
If the guest driver acks VIRTIO_NET_F_RSC_EXT feature,
the device coalesces TCPv4 and TCPv6 packets (if
respective VIRTIO_NET_F_GUEST_TSO feature is on,
populates extended RSC information in virtio header
and sets VIRTIO_NET_HDR_F_RSC_INFO bit in header flags.
The device does not recalculate checksums in the coalesced
packet, so they are not valid.
In this case:
All the data packets in a tcp connection are cached
to a single buffer in every receive interval, and will
be sent out via a timer, the 'virtio_net_rsc_timeout'
controls the interval, this value may impact the
performance and response time of tcp connection,
50000(50us) is an experience value to gain a performance
improvement, since the whql test sends packets every 100us,
so '300000(300us)' passes the test case, it is the default
value as well, tune it via the command line parameter
'rsc_interval' within 'virtio-net-pci' device, for example,
to launch a guest with interval set as '500000':
'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,
guest_rsc_ext=on,rsc_interval=500000'
The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.
'NetRscChain' is used to save the segments of IPv4/6 in a
VirtIONet device.
A new segment becomes a 'Candidate' as well as it passed sanity check,
the main handler of TCP includes TCP window update, duplicated
ACK check and the real data coalescing.
An 'Candidate' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.
Sanity check includes:
1. Incorrect version in IP header
2. An IP options or IP fragment
3. Not a TCP packet
4. Sanity size check to prevent buffer overflow attack.
5. An ECN packet
Even though, there might more cases should be considered such as
ip identification other flags, while it breaks the test because
windows set it to the same even it's not a fragment.
Normally it includes 2 typical ways to handle a TCP control flag,
'bypass' and 'finalize', 'bypass' means should be sent out directly,
while 'finalize' means the packets should also be bypassed, but this
should be done after search for the same connection packets in the
pool and drain all of them out, this is to avoid out of order fragment.
All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a
finalization, because this normally happens upon a connection is going
to be closed, an 'URG' packet also finalize current coalescing unit.
Statistics can be used to monitor the basic coalescing status, the
'out of order' and 'out of window' means how many retransmitting packets,
thus describe the performance intuitively.
Difference between ip v4 and v6 processing:
Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real 65535 payload.
Note that main goal of implementing this feature in software
is to create reference setup for certification tests. In such
setups guest migration is not required, so the coalesced packets
not yet delivered to the guest will be lost in case of migration.
Signed-off-by: Wei Xu <wexu@redhat.com>
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-01-03 15:12:29 +02:00
|
|
|
#define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
|
|
|
|
|
|
|
|
#define VIRTIO_NET_TCP_FLAG 0x3F
|
|
|
|
#define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
|
|
|
|
|
|
|
|
/* IPv4 max payload, 16 bits in the header */
|
|
|
|
#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
|
|
|
|
#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
|
|
|
|
|
|
|
|
/* header length value in ip header without option */
|
|
|
|
#define VIRTIO_NET_IP4_HEADER_LENGTH 5
|
|
|
|
|
|
|
|
#define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
|
|
|
|
#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
|
|
|
|
|
|
|
|
/* Purge coalesced packets timer interval, This value affects the performance
|
|
|
|
a lot, and should be tuned carefully, '300000'(300us) is the recommended
|
|
|
|
value to pass the WHQL test, '50000' can gain 2x netperf throughput with
|
|
|
|
tso/gso/gro 'off'. */
|
|
|
|
#define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
|
|
|
|
|
2020-05-08 15:59:28 +03:00
|
|
|
#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
|
|
|
|
VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
|
|
|
|
VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
|
|
|
|
VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
|
|
|
|
VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
|
|
|
|
VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
|
|
|
|
VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
|
|
|
|
VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
|
|
|
|
VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
|
|
|
|
|
2021-05-11 12:41:57 +02:00
|
|
|
static const VirtIOFeature feature_sizes[] = {
|
2018-03-07 22:25:40 -05:00
|
|
|
{.flags = 1ULL << VIRTIO_NET_F_MAC,
|
2019-10-11 17:27:59 +02:00
|
|
|
.end = endof(struct virtio_net_config, mac)},
|
2018-03-07 22:25:40 -05:00
|
|
|
{.flags = 1ULL << VIRTIO_NET_F_STATUS,
|
2019-10-11 17:27:59 +02:00
|
|
|
.end = endof(struct virtio_net_config, status)},
|
2018-03-07 22:25:40 -05:00
|
|
|
{.flags = 1ULL << VIRTIO_NET_F_MQ,
|
2019-10-11 17:27:59 +02:00
|
|
|
.end = endof(struct virtio_net_config, max_virtqueue_pairs)},
|
2018-03-07 22:25:40 -05:00
|
|
|
{.flags = 1ULL << VIRTIO_NET_F_MTU,
|
2019-10-11 17:27:59 +02:00
|
|
|
.end = endof(struct virtio_net_config, mtu)},
|
2018-03-07 22:25:41 -05:00
|
|
|
{.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
|
2019-10-11 17:27:59 +02:00
|
|
|
.end = endof(struct virtio_net_config, duplex)},
|
2020-05-08 15:59:31 +03:00
|
|
|
{.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
|
2020-05-08 15:59:28 +03:00
|
|
|
.end = endof(struct virtio_net_config, supported_hash_types)},
|
2013-02-05 17:47:16 -06:00
|
|
|
{}
|
|
|
|
};
|
|
|
|
|
2013-01-30 19:12:39 +08:00
|
|
|
static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
|
2013-01-30 19:12:38 +08:00
|
|
|
{
|
|
|
|
VirtIONet *n = qemu_get_nic_opaque(nc);
|
|
|
|
|
2013-01-30 19:12:39 +08:00
|
|
|
return &n->vqs[nc->queue_index];
|
2013-01-30 19:12:38 +08:00
|
|
|
}
|
2013-01-30 19:12:39 +08:00
|
|
|
|
|
|
|
static int vq2q(int queue_index)
|
|
|
|
{
|
|
|
|
return queue_index / 2;
|
|
|
|
}
|
|
|
|
|
2008-12-17 19:13:11 +00:00
|
|
|
/* TODO
|
|
|
|
* - we could suppress RX interrupt if we were so inclined.
|
|
|
|
*/
|
|
|
|
|
2009-02-05 22:36:08 +00:00
|
|
|
static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
|
2008-12-17 19:13:11 +00:00
|
|
|
{
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIONet *n = VIRTIO_NET(vdev);
|
2008-12-17 19:13:11 +00:00
|
|
|
struct virtio_net_config netcfg;
|
2020-07-25 08:13:17 +08:00
|
|
|
NetClientState *nc = qemu_get_queue(n->nic);
|
2021-02-26 00:55:06 +08:00
|
|
|
static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2020-07-01 22:55:37 +08:00
|
|
|
int ret = 0;
|
|
|
|
memset(&netcfg, 0 , sizeof(struct virtio_net_config));
|
2014-06-24 19:42:54 +02:00
|
|
|
virtio_stw_p(vdev, &netcfg.status, n->status);
|
|
|
|
virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
|
2016-12-10 16:30:38 +01:00
|
|
|
virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
|
2009-02-05 22:36:12 +00:00
|
|
|
memcpy(netcfg.mac, n->mac, ETH_ALEN);
|
2018-03-07 22:25:41 -05:00
|
|
|
virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
|
|
|
|
netcfg.duplex = n->net_conf.duplex;
|
2020-05-08 15:59:28 +03:00
|
|
|
netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
|
|
|
|
virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
|
2020-05-08 15:59:31 +03:00
|
|
|
virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
|
|
|
|
VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
|
2020-05-08 15:59:28 +03:00
|
|
|
virtio_stl_p(vdev, &netcfg.supported_hash_types,
|
|
|
|
VIRTIO_NET_RSS_SUPPORTED_HASHES);
|
2013-02-05 17:47:16 -06:00
|
|
|
memcpy(config, &netcfg, n->config_size);
|
2020-07-01 22:55:37 +08:00
|
|
|
|
2020-07-25 08:13:17 +08:00
|
|
|
/*
|
|
|
|
* Is this VDPA? No peer means not VDPA: there's no way to
|
|
|
|
* disconnect/reconnect a VDPA peer.
|
|
|
|
*/
|
|
|
|
if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
|
2020-07-01 22:55:37 +08:00
|
|
|
ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
|
2020-07-25 08:13:17 +08:00
|
|
|
n->config_size);
|
|
|
|
if (ret != -1) {
|
2021-02-26 00:55:06 +08:00
|
|
|
/*
|
|
|
|
* Some NIC/kernel combinations present 0 as the mac address. As
|
|
|
|
* that is not a legal address, try to proceed with the
|
|
|
|
* address from the QEMU command line in the hope that the
|
|
|
|
* address has been configured correctly elsewhere - just not
|
|
|
|
* reported by the device.
|
|
|
|
*/
|
|
|
|
if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
|
|
|
|
info_report("Zero hardware mac address detected. Ignoring.");
|
|
|
|
memcpy(netcfg.mac, n->mac, ETH_ALEN);
|
|
|
|
}
|
2020-07-25 08:13:17 +08:00
|
|
|
memcpy(config, &netcfg, n->config_size);
|
|
|
|
}
|
2020-07-01 22:55:37 +08:00
|
|
|
}
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
|
|
|
|
2009-02-05 22:36:08 +00:00
|
|
|
static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
|
|
|
|
{
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIONet *n = VIRTIO_NET(vdev);
|
2013-02-05 17:47:16 -06:00
|
|
|
struct virtio_net_config netcfg = {};
|
2020-07-25 08:13:17 +08:00
|
|
|
NetClientState *nc = qemu_get_queue(n->nic);
|
2009-02-05 22:36:08 +00:00
|
|
|
|
2013-02-05 17:47:16 -06:00
|
|
|
memcpy(&netcfg, config, n->config_size);
|
2009-02-05 22:36:08 +00:00
|
|
|
|
2015-08-17 11:48:29 +02:00
|
|
|
if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
|
|
|
|
!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
|
2013-01-22 23:44:45 +08:00
|
|
|
memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
|
2009-02-05 22:36:12 +00:00
|
|
|
memcpy(n->mac, netcfg.mac, ETH_ALEN);
|
2013-01-30 19:12:22 +08:00
|
|
|
qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
|
2009-02-05 22:36:08 +00:00
|
|
|
}
|
2020-07-01 22:55:37 +08:00
|
|
|
|
2020-07-25 08:13:17 +08:00
|
|
|
/*
|
|
|
|
* Is this VDPA? No peer means not VDPA: there's no way to
|
|
|
|
* disconnect/reconnect a VDPA peer.
|
|
|
|
*/
|
|
|
|
if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
|
|
|
|
vhost_net_set_config(get_vhost_net(nc->peer),
|
|
|
|
(uint8_t *)&netcfg, 0, n->config_size,
|
|
|
|
VHOST_SET_CONFIG_TYPE_MASTER);
|
2020-07-01 22:55:37 +08:00
|
|
|
}
|
2009-02-05 22:36:08 +00:00
|
|
|
}
|
|
|
|
|
2010-11-22 19:52:30 +02:00
|
|
|
static bool virtio_net_started(VirtIONet *n, uint8_t status)
|
|
|
|
{
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
2010-11-22 19:52:30 +02:00
|
|
|
return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
|
2013-04-11 16:30:01 +02:00
|
|
|
(n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
|
2010-11-22 19:52:30 +02:00
|
|
|
}
|
|
|
|
|
2019-02-27 13:24:10 +00:00
|
|
|
static void virtio_net_announce_notify(VirtIONet *net)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(net);
|
|
|
|
trace_virtio_net_announce_notify();
|
|
|
|
|
|
|
|
net->status |= VIRTIO_NET_S_ANNOUNCE;
|
|
|
|
virtio_notify_config(vdev);
|
|
|
|
}
|
|
|
|
|
2014-05-20 14:01:44 +08:00
|
|
|
static void virtio_net_announce_timer(void *opaque)
|
|
|
|
{
|
|
|
|
VirtIONet *n = opaque;
|
2019-02-27 13:24:07 +00:00
|
|
|
trace_virtio_net_announce_timer(n->announce_timer.round);
|
2014-05-20 14:01:44 +08:00
|
|
|
|
2019-02-27 13:24:07 +00:00
|
|
|
n->announce_timer.round--;
|
2019-02-27 13:24:10 +00:00
|
|
|
virtio_net_announce_notify(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_net_announce(NetClientState *nc)
|
|
|
|
{
|
|
|
|
VirtIONet *n = qemu_get_nic_opaque(nc);
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make sure the virtio migration announcement timer isn't running
|
|
|
|
* If it is, let it trigger announcement so that we do not cause
|
|
|
|
* confusion.
|
|
|
|
*/
|
|
|
|
if (n->announce_timer.round) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
|
|
|
|
virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
|
|
|
|
virtio_net_announce_notify(n);
|
|
|
|
}
|
2014-05-20 14:01:44 +08:00
|
|
|
}
|
|
|
|
|
2010-11-22 19:52:30 +02:00
|
|
|
static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
|
2010-09-27 18:41:30 +02:00
|
|
|
{
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
2013-01-30 19:12:22 +08:00
|
|
|
NetClientState *nc = qemu_get_queue(n->nic);
|
2013-01-30 19:12:39 +08:00
|
|
|
int queues = n->multiqueue ? n->max_queues : 1;
|
2013-01-30 19:12:22 +08:00
|
|
|
|
2014-05-27 15:05:08 +03:00
|
|
|
if (!get_vhost_net(nc->peer)) {
|
2010-09-27 18:41:30 +02:00
|
|
|
return;
|
|
|
|
}
|
2013-01-30 19:12:39 +08:00
|
|
|
|
2015-02-20 17:06:15 +01:00
|
|
|
if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
|
|
|
|
!!n->vhost_started) {
|
2010-09-27 18:41:30 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (!n->vhost_started) {
|
2014-09-04 11:39:17 +03:00
|
|
|
int r, i;
|
|
|
|
|
2016-02-05 11:43:11 +01:00
|
|
|
if (n->needs_vnet_hdr_swap) {
|
|
|
|
error_report("backend does not support %s vnet headers; "
|
|
|
|
"falling back on userspace virtio",
|
|
|
|
virtio_is_big_endian(vdev) ? "BE" : "LE");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-09-04 11:39:17 +03:00
|
|
|
/* Any packets outstanding? Purge them to avoid touching rings
|
|
|
|
* when vhost is running.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < queues; i++) {
|
|
|
|
NetClientState *qnc = qemu_get_subqueue(n->nic, i);
|
|
|
|
|
|
|
|
/* Purge both directions: TX and RX. */
|
|
|
|
qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
|
|
|
|
qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
|
|
|
|
}
|
|
|
|
|
2016-12-10 16:30:38 +01:00
|
|
|
if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
|
|
|
|
r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
|
|
|
|
if (r < 0) {
|
|
|
|
error_report("%uBytes MTU not supported by the backend",
|
|
|
|
n->net_conf.mtu);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-25 17:38:59 +02:00
|
|
|
n->vhost_started = 1;
|
2013-04-11 16:30:01 +02:00
|
|
|
r = vhost_net_start(vdev, n->nic->ncs, queues);
|
2010-09-27 18:41:30 +02:00
|
|
|
if (r < 0) {
|
2010-11-15 20:44:37 +00:00
|
|
|
error_report("unable to start vhost net: %d: "
|
|
|
|
"falling back on userspace virtio", -r);
|
2012-12-25 17:38:59 +02:00
|
|
|
n->vhost_started = 0;
|
2010-09-27 18:41:30 +02:00
|
|
|
}
|
|
|
|
} else {
|
2013-04-11 16:30:01 +02:00
|
|
|
vhost_net_stop(vdev, n->nic->ncs, queues);
|
2010-09-27 18:41:30 +02:00
|
|
|
n->vhost_started = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-05 11:43:11 +01:00
|
|
|
static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
|
|
|
|
NetClientState *peer,
|
|
|
|
bool enable)
|
|
|
|
{
|
|
|
|
if (virtio_is_big_endian(vdev)) {
|
|
|
|
return qemu_set_vnet_be(peer, enable);
|
|
|
|
} else {
|
|
|
|
return qemu_set_vnet_le(peer, enable);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
|
|
|
|
int queues, bool enable)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < queues; i++) {
|
|
|
|
if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
|
|
|
|
enable) {
|
|
|
|
while (--i >= 0) {
|
|
|
|
virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
|
|
|
int queues = n->multiqueue ? n->max_queues : 1;
|
|
|
|
|
|
|
|
if (virtio_net_started(n, status)) {
|
|
|
|
/* Before using the device, we tell the network backend about the
|
|
|
|
* endianness to use when parsing vnet headers. If the backend
|
|
|
|
* can't do it, we fallback onto fixing the headers in the core
|
|
|
|
* virtio-net code.
|
|
|
|
*/
|
|
|
|
n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
|
|
|
|
queues, true);
|
|
|
|
} else if (virtio_net_started(n, vdev->status)) {
|
|
|
|
/* After using the device, we need to reset the network backend to
|
|
|
|
* the default (guest native endianness), otherwise the guest may
|
|
|
|
* lose network connectivity if it is rebooted into a different
|
|
|
|
* endianness.
|
|
|
|
*/
|
|
|
|
virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-13 10:12:08 +02:00
|
|
|
static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
|
|
|
unsigned int dropped = virtqueue_drop_all(vq);
|
|
|
|
if (dropped) {
|
|
|
|
virtio_notify(vdev, vq);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-11-22 19:52:30 +02:00
|
|
|
static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
|
|
|
|
{
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIONet *n = VIRTIO_NET(vdev);
|
2013-01-30 19:12:39 +08:00
|
|
|
VirtIONetQueue *q;
|
|
|
|
int i;
|
|
|
|
uint8_t queue_status;
|
2010-11-22 19:52:30 +02:00
|
|
|
|
2016-02-05 11:43:11 +01:00
|
|
|
virtio_net_vnet_endian_status(n, status);
|
2010-11-22 19:52:30 +02:00
|
|
|
virtio_net_vhost_status(n, status);
|
|
|
|
|
2013-01-30 19:12:39 +08:00
|
|
|
for (i = 0; i < n->max_queues; i++) {
|
2015-07-15 11:02:27 +08:00
|
|
|
NetClientState *ncs = qemu_get_subqueue(n->nic, i);
|
|
|
|
bool queue_started;
|
2013-01-30 19:12:39 +08:00
|
|
|
q = &n->vqs[i];
|
2010-11-22 19:52:30 +02:00
|
|
|
|
2013-01-30 19:12:39 +08:00
|
|
|
if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
|
|
|
|
queue_status = 0;
|
2010-11-22 19:52:30 +02:00
|
|
|
} else {
|
2013-01-30 19:12:39 +08:00
|
|
|
queue_status = status;
|
2010-11-22 19:52:30 +02:00
|
|
|
}
|
2015-07-15 11:02:27 +08:00
|
|
|
queue_started =
|
|
|
|
virtio_net_started(n, queue_status) && !n->vhost_started;
|
|
|
|
|
|
|
|
if (queue_started) {
|
|
|
|
qemu_flush_queued_packets(ncs);
|
|
|
|
}
|
2013-01-30 19:12:39 +08:00
|
|
|
|
|
|
|
if (!q->tx_waiting) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2015-07-15 11:02:27 +08:00
|
|
|
if (queue_started) {
|
2013-01-30 19:12:39 +08:00
|
|
|
if (q->tx_timer) {
|
2013-08-21 16:03:08 +01:00
|
|
|
timer_mod(q->tx_timer,
|
|
|
|
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
|
2013-01-30 19:12:39 +08:00
|
|
|
} else {
|
|
|
|
qemu_bh_schedule(q->tx_bh);
|
|
|
|
}
|
2010-11-22 19:52:30 +02:00
|
|
|
} else {
|
2013-01-30 19:12:39 +08:00
|
|
|
if (q->tx_timer) {
|
2013-08-21 16:03:08 +01:00
|
|
|
timer_del(q->tx_timer);
|
2013-01-30 19:12:39 +08:00
|
|
|
} else {
|
|
|
|
qemu_bh_cancel(q->tx_bh);
|
|
|
|
}
|
2016-12-13 10:12:08 +02:00
|
|
|
if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
|
2017-11-22 17:57:19 +08:00
|
|
|
(queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
|
|
|
|
vdev->vm_running) {
|
2016-12-13 10:12:08 +02:00
|
|
|
/* if tx is waiting we are likely have some packets in tx queue
|
|
|
|
* and disabled notification */
|
|
|
|
q->tx_waiting = 0;
|
|
|
|
virtio_queue_set_notification(q->tx_vq, 1);
|
|
|
|
virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
|
|
|
|
}
|
2010-11-22 19:52:30 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-24 16:35:13 +01:00
|
|
|
static void virtio_net_set_link_status(NetClientState *nc)
|
2009-01-08 19:46:33 +00:00
|
|
|
{
|
2013-01-30 19:12:23 +08:00
|
|
|
VirtIONet *n = qemu_get_nic_opaque(nc);
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
2009-01-08 19:46:33 +00:00
|
|
|
uint16_t old_status = n->status;
|
|
|
|
|
2009-11-25 18:49:11 +00:00
|
|
|
if (nc->link_down)
|
2009-01-08 19:46:33 +00:00
|
|
|
n->status &= ~VIRTIO_NET_S_LINK_UP;
|
|
|
|
else
|
|
|
|
n->status |= VIRTIO_NET_S_LINK_UP;
|
|
|
|
|
|
|
|
if (n->status != old_status)
|
2013-04-11 16:30:01 +02:00
|
|
|
virtio_notify_config(vdev);
|
2010-09-27 18:41:30 +02:00
|
|
|
|
2013-04-11 16:30:01 +02:00
|
|
|
virtio_net_set_status(vdev, vdev->status);
|
2009-01-08 19:46:33 +00:00
|
|
|
}
|
|
|
|
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
static void rxfilter_notify(NetClientState *nc)
|
|
|
|
{
|
|
|
|
VirtIONet *n = qemu_get_nic_opaque(nc);
|
|
|
|
|
|
|
|
if (nc->rxfilter_notify_enabled) {
|
2020-05-05 17:29:10 +02:00
|
|
|
char *path = object_get_canonical_path(OBJECT(n->qdev));
|
2014-06-18 08:43:48 +02:00
|
|
|
qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
|
2018-08-15 21:37:37 +08:00
|
|
|
n->netclient_name, path);
|
2013-11-18 23:32:17 +08:00
|
|
|
g_free(path);
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
|
|
|
|
/* disable event notification to avoid events flooding */
|
|
|
|
nc->rxfilter_notify_enabled = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-26 08:19:43 +08:00
|
|
|
static intList *get_vlan_table(VirtIONet *n)
|
|
|
|
{
|
2020-11-12 19:13:37 -06:00
|
|
|
intList *list;
|
2014-03-26 08:19:43 +08:00
|
|
|
int i, j;
|
|
|
|
|
|
|
|
list = NULL;
|
|
|
|
for (i = 0; i < MAX_VLAN >> 5; i++) {
|
|
|
|
for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
|
|
|
|
if (n->vlans[i] & (1U << j)) {
|
2020-11-12 19:13:37 -06:00
|
|
|
QAPI_LIST_PREPEND(list, (i << 5) + j);
|
2014-03-26 08:19:43 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
|
|
|
|
{
|
|
|
|
VirtIONet *n = qemu_get_nic_opaque(nc);
|
2014-03-26 08:19:43 +08:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
RxFilterInfo *info;
|
2020-11-12 19:13:37 -06:00
|
|
|
strList *str_list;
|
2014-03-26 08:19:43 +08:00
|
|
|
int i;
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
|
|
|
|
info = g_malloc0(sizeof(*info));
|
|
|
|
info->name = g_strdup(nc->name);
|
|
|
|
info->promiscuous = n->promisc;
|
|
|
|
|
|
|
|
if (n->nouni) {
|
|
|
|
info->unicast = RX_STATE_NONE;
|
|
|
|
} else if (n->alluni) {
|
|
|
|
info->unicast = RX_STATE_ALL;
|
|
|
|
} else {
|
|
|
|
info->unicast = RX_STATE_NORMAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (n->nomulti) {
|
|
|
|
info->multicast = RX_STATE_NONE;
|
|
|
|
} else if (n->allmulti) {
|
|
|
|
info->multicast = RX_STATE_ALL;
|
|
|
|
} else {
|
|
|
|
info->multicast = RX_STATE_NORMAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
info->broadcast_allowed = n->nobcast;
|
|
|
|
info->multicast_overflow = n->mac_table.multi_overflow;
|
|
|
|
info->unicast_overflow = n->mac_table.uni_overflow;
|
|
|
|
|
2015-03-13 21:09:26 -07:00
|
|
|
info->main_mac = qemu_mac_strdup_printf(n->mac);
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
|
|
|
|
str_list = NULL;
|
|
|
|
for (i = 0; i < n->mac_table.first_multi; i++) {
|
2020-11-12 19:13:37 -06:00
|
|
|
QAPI_LIST_PREPEND(str_list,
|
|
|
|
qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
}
|
|
|
|
info->unicast_table = str_list;
|
|
|
|
|
|
|
|
str_list = NULL;
|
|
|
|
for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
|
2020-11-12 19:13:37 -06:00
|
|
|
QAPI_LIST_PREPEND(str_list,
|
|
|
|
qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
}
|
|
|
|
info->multicast_table = str_list;
|
2014-03-26 08:19:43 +08:00
|
|
|
info->vlan_table = get_vlan_table(n);
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
|
2015-08-17 11:48:29 +02:00
|
|
|
if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
|
2014-03-26 08:19:43 +08:00
|
|
|
info->vlan = RX_STATE_ALL;
|
|
|
|
} else if (!info->vlan_table) {
|
|
|
|
info->vlan = RX_STATE_NONE;
|
|
|
|
} else {
|
|
|
|
info->vlan = RX_STATE_NORMAL;
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* enable event notification after query */
|
|
|
|
nc->rxfilter_notify_enabled = 1;
|
|
|
|
|
|
|
|
return info;
|
|
|
|
}
|
|
|
|
|
2009-02-05 22:36:20 +00:00
|
|
|
static void virtio_net_reset(VirtIODevice *vdev)
|
|
|
|
{
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIONet *n = VIRTIO_NET(vdev);
|
virtio_net: flush uncompleted TX on reset
If the backend could not transmit a packet right away for some reason,
the packet is queued for asynchronous sending. The corresponding vq
element is tracked in the async_tx.elem field of the VirtIONetQueue,
for later freeing when the transmission is complete.
If a reset happens before completion, virtio_net_tx_complete() will push
async_tx.elem back to the guest anyway, and we end up with the inuse flag
of the vq being equal to -1. The next call to virtqueue_pop() is then
likely to fail with "Virtqueue size exceeded".
This can be reproduced easily by starting a guest with an hubport backend
that is not connected to a functional network, eg,
-device virtio-net-pci,netdev=hub0 -netdev hubport,id=hub0,hubid=0
and no other -netdev hubport,hubid=0 on the command line.
The appropriate fix is to ensure that such an asynchronous transmission
cannot survive a device reset. So for all queues, we first try to send
the packet again, and eventually we purge it if the backend still could
not deliver it.
CC: qemu-stable@nongnu.org
Reported-by: R. Nageswara Sastry <nasastry@in.ibm.com>
Buglink: https://github.com/open-power-host-os/qemu/issues/37
Signed-off-by: Greg Kurz <groug@kaod.org>
Tested-by: R. Nageswara Sastry <nasastry@in.ibm.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2018-03-20 11:44:56 +01:00
|
|
|
int i;
|
2009-02-05 22:36:20 +00:00
|
|
|
|
|
|
|
/* Reset back to compatibility mode */
|
|
|
|
n->promisc = 1;
|
|
|
|
n->allmulti = 0;
|
2009-06-05 14:47:18 -06:00
|
|
|
n->alluni = 0;
|
|
|
|
n->nomulti = 0;
|
|
|
|
n->nouni = 0;
|
|
|
|
n->nobcast = 0;
|
2013-01-30 19:12:39 +08:00
|
|
|
/* multiqueue is disabled by default */
|
|
|
|
n->curr_queues = 1;
|
2019-02-27 13:24:07 +00:00
|
|
|
timer_del(n->announce_timer.tm);
|
|
|
|
n->announce_timer.round = 0;
|
2014-05-20 14:01:44 +08:00
|
|
|
n->status &= ~VIRTIO_NET_S_ANNOUNCE;
|
2009-02-05 22:36:28 +00:00
|
|
|
|
2009-02-05 22:36:32 +00:00
|
|
|
/* Flush any MAC and VLAN filter table state */
|
2009-02-05 22:36:28 +00:00
|
|
|
n->mac_table.in_use = 0;
|
2009-06-05 14:47:13 -06:00
|
|
|
n->mac_table.first_multi = 0;
|
2009-06-05 14:47:08 -06:00
|
|
|
n->mac_table.multi_overflow = 0;
|
|
|
|
n->mac_table.uni_overflow = 0;
|
2009-02-05 22:36:28 +00:00
|
|
|
memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
|
2013-01-16 11:37:40 +02:00
|
|
|
memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
|
2013-09-17 11:45:36 +03:00
|
|
|
qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
|
2009-02-05 22:36:32 +00:00
|
|
|
memset(n->vlans, 0, MAX_VLAN >> 3);
|
virtio_net: flush uncompleted TX on reset
If the backend could not transmit a packet right away for some reason,
the packet is queued for asynchronous sending. The corresponding vq
element is tracked in the async_tx.elem field of the VirtIONetQueue,
for later freeing when the transmission is complete.
If a reset happens before completion, virtio_net_tx_complete() will push
async_tx.elem back to the guest anyway, and we end up with the inuse flag
of the vq being equal to -1. The next call to virtqueue_pop() is then
likely to fail with "Virtqueue size exceeded".
This can be reproduced easily by starting a guest with an hubport backend
that is not connected to a functional network, eg,
-device virtio-net-pci,netdev=hub0 -netdev hubport,id=hub0,hubid=0
and no other -netdev hubport,hubid=0 on the command line.
The appropriate fix is to ensure that such an asynchronous transmission
cannot survive a device reset. So for all queues, we first try to send
the packet again, and eventually we purge it if the backend still could
not deliver it.
CC: qemu-stable@nongnu.org
Reported-by: R. Nageswara Sastry <nasastry@in.ibm.com>
Buglink: https://github.com/open-power-host-os/qemu/issues/37
Signed-off-by: Greg Kurz <groug@kaod.org>
Tested-by: R. Nageswara Sastry <nasastry@in.ibm.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2018-03-20 11:44:56 +01:00
|
|
|
|
|
|
|
/* Flush any async TX */
|
|
|
|
for (i = 0; i < n->max_queues; i++) {
|
|
|
|
NetClientState *nc = qemu_get_subqueue(n->nic, i);
|
|
|
|
|
|
|
|
if (nc->peer) {
|
|
|
|
qemu_flush_or_purge_queued_packets(nc->peer, true);
|
|
|
|
assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
|
|
|
|
}
|
|
|
|
}
|
2009-02-05 22:36:20 +00:00
|
|
|
}
|
|
|
|
|
2012-09-24 17:04:21 +02:00
|
|
|
static void peer_test_vnet_hdr(VirtIONet *n)
|
2009-10-22 17:43:45 +01:00
|
|
|
{
|
2013-01-30 19:12:22 +08:00
|
|
|
NetClientState *nc = qemu_get_queue(n->nic);
|
|
|
|
if (!nc->peer) {
|
2012-09-24 17:04:21 +02:00
|
|
|
return;
|
2013-01-30 19:12:22 +08:00
|
|
|
}
|
2009-10-22 17:43:45 +01:00
|
|
|
|
2014-02-20 12:14:07 +01:00
|
|
|
n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
|
2012-09-24 17:04:21 +02:00
|
|
|
}
|
2009-10-22 17:43:45 +01:00
|
|
|
|
2012-09-24 17:04:21 +02:00
|
|
|
static int peer_has_vnet_hdr(VirtIONet *n)
|
|
|
|
{
|
2009-10-22 17:43:45 +01:00
|
|
|
return n->has_vnet_hdr;
|
|
|
|
}
|
|
|
|
|
2009-10-22 17:43:50 +01:00
|
|
|
static int peer_has_ufo(VirtIONet *n)
|
|
|
|
{
|
|
|
|
if (!peer_has_vnet_hdr(n))
|
|
|
|
return 0;
|
|
|
|
|
2014-02-20 12:14:07 +01:00
|
|
|
n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
|
2009-10-22 17:43:50 +01:00
|
|
|
|
|
|
|
return n->has_ufo;
|
|
|
|
}
|
|
|
|
|
2015-06-04 12:34:17 +02:00
|
|
|
static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
|
2020-05-08 15:59:31 +03:00
|
|
|
int version_1, int hash_report)
|
2012-09-24 21:05:03 +02:00
|
|
|
{
|
2013-01-30 19:12:39 +08:00
|
|
|
int i;
|
|
|
|
NetClientState *nc;
|
|
|
|
|
2012-09-24 21:05:03 +02:00
|
|
|
n->mergeable_rx_bufs = mergeable_rx_bufs;
|
|
|
|
|
2015-06-04 12:34:17 +02:00
|
|
|
if (version_1) {
|
2020-05-08 15:59:31 +03:00
|
|
|
n->guest_hdr_len = hash_report ?
|
|
|
|
sizeof(struct virtio_net_hdr_v1_hash) :
|
|
|
|
sizeof(struct virtio_net_hdr_mrg_rxbuf);
|
|
|
|
n->rss_data.populate_hash = !!hash_report;
|
2015-06-04 12:34:17 +02:00
|
|
|
} else {
|
|
|
|
n->guest_hdr_len = n->mergeable_rx_bufs ?
|
|
|
|
sizeof(struct virtio_net_hdr_mrg_rxbuf) :
|
|
|
|
sizeof(struct virtio_net_hdr);
|
|
|
|
}
|
2012-09-24 21:05:03 +02:00
|
|
|
|
2013-01-30 19:12:39 +08:00
|
|
|
for (i = 0; i < n->max_queues; i++) {
|
|
|
|
nc = qemu_get_subqueue(n->nic, i);
|
|
|
|
|
|
|
|
if (peer_has_vnet_hdr(n) &&
|
2014-02-20 12:14:07 +01:00
|
|
|
qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
|
|
|
|
qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
|
2013-01-30 19:12:39 +08:00
|
|
|
n->host_hdr_len = n->guest_hdr_len;
|
|
|
|
}
|
2012-09-24 21:05:03 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-07-03 22:25:24 +03:00
|
|
|
static int virtio_net_max_tx_queue_size(VirtIONet *n)
|
|
|
|
{
|
|
|
|
NetClientState *peer = n->nic_conf.peers.ncs[0];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Backends other than vhost-user don't support max queue size.
|
|
|
|
*/
|
|
|
|
if (!peer) {
|
|
|
|
return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
|
|
|
|
return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
return VIRTQUEUE_MAX_SIZE;
|
|
|
|
}
|
|
|
|
|
2013-01-30 19:12:39 +08:00
|
|
|
static int peer_attach(VirtIONet *n, int index)
|
|
|
|
{
|
|
|
|
NetClientState *nc = qemu_get_subqueue(n->nic, index);
|
|
|
|
|
|
|
|
if (!nc->peer) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
qapi: Change Netdev into a flat union
This is a mostly-mechanical conversion that creates a new flat
union 'Netdev' QAPI type that covers all the branches of the
former 'NetClientOptions' simple union, where the branches are
now listed in a new 'NetClientDriver' enum rather than generated
from the simple union. The existence of a flat union has no
change to the command line syntax accepted for new code, and
will make it possible for a future patch to switch the QMP
command to parse a boxed union for no change to valid QMP; but
it does have some ripple effect on the C code when dealing with
the new types.
While making the conversion, note that the 'NetLegacy' type
remains unchanged: it applies only to legacy command line options,
and will not be ported to QMP, so it should remain a wrapper
around a simple union; to avoid confusion, the type named
'NetClientOptions' is now gone, and we introduce 'NetLegacyOptions'
in its place. Then, in the C code, we convert from NetLegacy to
Netdev as soon as possible, so that the bulk of the net stack
only has to deal with one QAPI type, not two. Note that since
the old legacy code always rejected 'hubport', we can just omit
that branch from the new 'NetLegacyOptions' simple union.
Based on an idea originally by Zoltán Kővágó <DirtY.iCE.hu@gmail.com>:
Message-Id: <01a527fbf1a5de880091f98cf011616a78adeeee.1441627176.git.DirtY.iCE.hu@gmail.com>
although the sed script in that patch no longer applies due to
other changes in the tree since then, and I also did some manual
cleanups (such as fixing whitespace to keep checkpatch happy).
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-13-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Fixup from Eric squashed in]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-13 21:50:23 -06:00
|
|
|
if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
|
2015-09-23 12:20:01 +08:00
|
|
|
vhost_set_vring_enable(nc->peer, 1);
|
|
|
|
}
|
|
|
|
|
qapi: Change Netdev into a flat union
This is a mostly-mechanical conversion that creates a new flat
union 'Netdev' QAPI type that covers all the branches of the
former 'NetClientOptions' simple union, where the branches are
now listed in a new 'NetClientDriver' enum rather than generated
from the simple union. The existence of a flat union has no
change to the command line syntax accepted for new code, and
will make it possible for a future patch to switch the QMP
command to parse a boxed union for no change to valid QMP; but
it does have some ripple effect on the C code when dealing with
the new types.
While making the conversion, note that the 'NetLegacy' type
remains unchanged: it applies only to legacy command line options,
and will not be ported to QMP, so it should remain a wrapper
around a simple union; to avoid confusion, the type named
'NetClientOptions' is now gone, and we introduce 'NetLegacyOptions'
in its place. Then, in the C code, we convert from NetLegacy to
Netdev as soon as possible, so that the bulk of the net stack
only has to deal with one QAPI type, not two. Note that since
the old legacy code always rejected 'hubport', we can just omit
that branch from the new 'NetLegacyOptions' simple union.
Based on an idea originally by Zoltán Kővágó <DirtY.iCE.hu@gmail.com>:
Message-Id: <01a527fbf1a5de880091f98cf011616a78adeeee.1441627176.git.DirtY.iCE.hu@gmail.com>
although the sed script in that patch no longer applies due to
other changes in the tree since then, and I also did some manual
cleanups (such as fixing whitespace to keep checkpatch happy).
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-13-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Fixup from Eric squashed in]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-13 21:50:23 -06:00
|
|
|
if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
|
2013-01-30 19:12:39 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-03-29 10:41:23 +08:00
|
|
|
if (n->max_queues == 1) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-01-30 19:12:39 +08:00
|
|
|
return tap_enable(nc->peer);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int peer_detach(VirtIONet *n, int index)
|
|
|
|
{
|
|
|
|
NetClientState *nc = qemu_get_subqueue(n->nic, index);
|
|
|
|
|
|
|
|
if (!nc->peer) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
qapi: Change Netdev into a flat union
This is a mostly-mechanical conversion that creates a new flat
union 'Netdev' QAPI type that covers all the branches of the
former 'NetClientOptions' simple union, where the branches are
now listed in a new 'NetClientDriver' enum rather than generated
from the simple union. The existence of a flat union has no
change to the command line syntax accepted for new code, and
will make it possible for a future patch to switch the QMP
command to parse a boxed union for no change to valid QMP; but
it does have some ripple effect on the C code when dealing with
the new types.
While making the conversion, note that the 'NetLegacy' type
remains unchanged: it applies only to legacy command line options,
and will not be ported to QMP, so it should remain a wrapper
around a simple union; to avoid confusion, the type named
'NetClientOptions' is now gone, and we introduce 'NetLegacyOptions'
in its place. Then, in the C code, we convert from NetLegacy to
Netdev as soon as possible, so that the bulk of the net stack
only has to deal with one QAPI type, not two. Note that since
the old legacy code always rejected 'hubport', we can just omit
that branch from the new 'NetLegacyOptions' simple union.
Based on an idea originally by Zoltán Kővágó <DirtY.iCE.hu@gmail.com>:
Message-Id: <01a527fbf1a5de880091f98cf011616a78adeeee.1441627176.git.DirtY.iCE.hu@gmail.com>
although the sed script in that patch no longer applies due to
other changes in the tree since then, and I also did some manual
cleanups (such as fixing whitespace to keep checkpatch happy).
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-13-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Fixup from Eric squashed in]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-13 21:50:23 -06:00
|
|
|
if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
|
2015-09-23 12:20:01 +08:00
|
|
|
vhost_set_vring_enable(nc->peer, 0);
|
|
|
|
}
|
|
|
|
|
qapi: Change Netdev into a flat union
This is a mostly-mechanical conversion that creates a new flat
union 'Netdev' QAPI type that covers all the branches of the
former 'NetClientOptions' simple union, where the branches are
now listed in a new 'NetClientDriver' enum rather than generated
from the simple union. The existence of a flat union has no
change to the command line syntax accepted for new code, and
will make it possible for a future patch to switch the QMP
command to parse a boxed union for no change to valid QMP; but
it does have some ripple effect on the C code when dealing with
the new types.
While making the conversion, note that the 'NetLegacy' type
remains unchanged: it applies only to legacy command line options,
and will not be ported to QMP, so it should remain a wrapper
around a simple union; to avoid confusion, the type named
'NetClientOptions' is now gone, and we introduce 'NetLegacyOptions'
in its place. Then, in the C code, we convert from NetLegacy to
Netdev as soon as possible, so that the bulk of the net stack
only has to deal with one QAPI type, not two. Note that since
the old legacy code always rejected 'hubport', we can just omit
that branch from the new 'NetLegacyOptions' simple union.
Based on an idea originally by Zoltán Kővágó <DirtY.iCE.hu@gmail.com>:
Message-Id: <01a527fbf1a5de880091f98cf011616a78adeeee.1441627176.git.DirtY.iCE.hu@gmail.com>
although the sed script in that patch no longer applies due to
other changes in the tree since then, and I also did some manual
cleanups (such as fixing whitespace to keep checkpatch happy).
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-13-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Fixup from Eric squashed in]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-13 21:50:23 -06:00
|
|
|
if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
|
2013-01-30 19:12:39 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return tap_disable(nc->peer);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_net_set_queues(VirtIONet *n)
|
|
|
|
{
|
|
|
|
int i;
|
2014-02-11 10:42:02 +10:30
|
|
|
int r;
|
2013-01-30 19:12:39 +08:00
|
|
|
|
2016-11-01 00:01:17 +02:00
|
|
|
if (n->nic->peer_deleted) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2013-01-30 19:12:39 +08:00
|
|
|
for (i = 0; i < n->max_queues; i++) {
|
|
|
|
if (i < n->curr_queues) {
|
2014-02-11 10:42:02 +10:30
|
|
|
r = peer_attach(n, i);
|
|
|
|
assert(!r);
|
2013-01-30 19:12:39 +08:00
|
|
|
} else {
|
2014-02-11 10:42:02 +10:30
|
|
|
r = peer_detach(n, i);
|
|
|
|
assert(!r);
|
2013-01-30 19:12:39 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-04-25 15:24:23 +08:00
|
|
|
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
|
2013-01-30 19:12:39 +08:00
|
|
|
|
2015-07-27 17:49:19 +08:00
|
|
|
static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
|
|
|
|
Error **errp)
|
2008-12-17 19:13:11 +00:00
|
|
|
{
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIONet *n = VIRTIO_NET(vdev);
|
2013-01-30 19:12:22 +08:00
|
|
|
NetClientState *nc = qemu_get_queue(n->nic);
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2015-04-28 19:51:12 +08:00
|
|
|
/* Firstly sync all virtio-net possible supported features */
|
|
|
|
features |= n->host_features;
|
|
|
|
|
2014-12-11 14:25:05 +01:00
|
|
|
virtio_add_feature(&features, VIRTIO_NET_F_MAC);
|
2010-01-12 20:50:17 +02:00
|
|
|
|
2012-09-24 17:04:21 +02:00
|
|
|
if (!peer_has_vnet_hdr(n)) {
|
2014-12-11 14:25:05 +01:00
|
|
|
virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
|
|
|
|
virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
|
|
|
|
virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
|
|
|
|
virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
|
2010-01-10 13:52:53 +02:00
|
|
|
|
2014-12-11 14:25:05 +01:00
|
|
|
virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
|
|
|
|
virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
|
|
|
|
virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
|
|
|
|
virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
|
2020-05-08 15:59:31 +03:00
|
|
|
|
|
|
|
virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
|
2010-01-10 13:52:53 +02:00
|
|
|
}
|
2009-10-22 17:43:45 +01:00
|
|
|
|
2010-01-10 13:52:53 +02:00
|
|
|
if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
|
2014-12-11 14:25:05 +01:00
|
|
|
virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
|
|
|
|
virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
|
2009-10-22 17:43:45 +01:00
|
|
|
}
|
|
|
|
|
2014-05-27 15:05:08 +03:00
|
|
|
if (!get_vhost_net(nc->peer)) {
|
2010-03-17 13:08:42 +02:00
|
|
|
return features;
|
|
|
|
}
|
virtio-net: support RSC v4/v6 tcp traffic for Windows HCK
This commit adds implementation of RX packets
coalescing, compatible with requirements of Windows
Hardware compatibility kit.
The device enables feature VIRTIO_NET_F_RSC_EXT in
host features if it supports extended RSC functionality
as defined in the specification.
This feature requires at least one of VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6. Windows guest driver acks
this feature only if VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
is also present.
If the guest driver acks VIRTIO_NET_F_RSC_EXT feature,
the device coalesces TCPv4 and TCPv6 packets (if
respective VIRTIO_NET_F_GUEST_TSO feature is on,
populates extended RSC information in virtio header
and sets VIRTIO_NET_HDR_F_RSC_INFO bit in header flags.
The device does not recalculate checksums in the coalesced
packet, so they are not valid.
In this case:
All the data packets in a tcp connection are cached
to a single buffer in every receive interval, and will
be sent out via a timer, the 'virtio_net_rsc_timeout'
controls the interval, this value may impact the
performance and response time of tcp connection,
50000(50us) is an experience value to gain a performance
improvement, since the whql test sends packets every 100us,
so '300000(300us)' passes the test case, it is the default
value as well, tune it via the command line parameter
'rsc_interval' within 'virtio-net-pci' device, for example,
to launch a guest with interval set as '500000':
'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,
guest_rsc_ext=on,rsc_interval=500000'
The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.
'NetRscChain' is used to save the segments of IPv4/6 in a
VirtIONet device.
A new segment becomes a 'Candidate' as well as it passed sanity check,
the main handler of TCP includes TCP window update, duplicated
ACK check and the real data coalescing.
An 'Candidate' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.
Sanity check includes:
1. Incorrect version in IP header
2. An IP options or IP fragment
3. Not a TCP packet
4. Sanity size check to prevent buffer overflow attack.
5. An ECN packet
Even though, there might more cases should be considered such as
ip identification other flags, while it breaks the test because
windows set it to the same even it's not a fragment.
Normally it includes 2 typical ways to handle a TCP control flag,
'bypass' and 'finalize', 'bypass' means should be sent out directly,
while 'finalize' means the packets should also be bypassed, but this
should be done after search for the same connection packets in the
pool and drain all of them out, this is to avoid out of order fragment.
All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a
finalization, because this normally happens upon a connection is going
to be closed, an 'URG' packet also finalize current coalescing unit.
Statistics can be used to monitor the basic coalescing status, the
'out of order' and 'out of window' means how many retransmitting packets,
thus describe the performance intuitively.
Difference between ip v4 and v6 processing:
Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real 65535 payload.
Note that main goal of implementing this feature in software
is to create reference setup for certification tests. In such
setups guest migration is not required, so the coalesced packets
not yet delivered to the guest will be lost in case of migration.
Signed-off-by: Wei Xu <wexu@redhat.com>
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-01-03 15:12:29 +02:00
|
|
|
|
2021-05-14 14:48:33 +03:00
|
|
|
if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
|
|
|
|
virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
|
|
|
|
}
|
2017-05-23 14:31:19 +02:00
|
|
|
features = vhost_net_get_features(get_vhost_net(nc->peer), features);
|
|
|
|
vdev->backend_features = features;
|
|
|
|
|
|
|
|
if (n->mtu_bypass_backend &&
|
|
|
|
(n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
|
|
|
|
features |= (1ULL << VIRTIO_NET_F_MTU);
|
|
|
|
}
|
|
|
|
|
|
|
|
return features;
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
|
|
|
|
2015-06-01 10:45:40 +02:00
|
|
|
static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
|
2009-04-05 17:40:08 +00:00
|
|
|
{
|
2015-06-01 10:45:40 +02:00
|
|
|
uint64_t features = 0;
|
2009-04-05 17:40:08 +00:00
|
|
|
|
|
|
|
/* Linux kernel 2.6.25. It understood MAC (as everyone must),
|
|
|
|
* but also these: */
|
2014-12-11 14:25:05 +01:00
|
|
|
virtio_add_feature(&features, VIRTIO_NET_F_MAC);
|
|
|
|
virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
|
|
|
|
virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
|
|
|
|
virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
|
|
|
|
virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
|
2009-04-05 17:40:08 +00:00
|
|
|
|
2010-01-10 13:52:53 +02:00
|
|
|
return features;
|
2009-04-05 17:40:08 +00:00
|
|
|
}
|
|
|
|
|
2013-05-20 11:18:14 +03:00
|
|
|
static void virtio_net_apply_guest_offloads(VirtIONet *n)
|
|
|
|
{
|
2014-02-20 12:14:09 +01:00
|
|
|
qemu_set_offload(qemu_get_queue(n->nic)->peer,
|
2013-05-20 11:18:14 +03:00
|
|
|
!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
|
|
|
|
!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
|
|
|
|
!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
|
|
|
|
!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
|
|
|
|
!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
|
|
|
|
{
|
|
|
|
static const uint64_t guest_offloads_mask =
|
|
|
|
(1ULL << VIRTIO_NET_F_GUEST_CSUM) |
|
|
|
|
(1ULL << VIRTIO_NET_F_GUEST_TSO4) |
|
|
|
|
(1ULL << VIRTIO_NET_F_GUEST_TSO6) |
|
|
|
|
(1ULL << VIRTIO_NET_F_GUEST_ECN) |
|
|
|
|
(1ULL << VIRTIO_NET_F_GUEST_UFO);
|
|
|
|
|
|
|
|
return guest_offloads_mask & features;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
|
|
|
return virtio_net_guest_offloads_by_features(vdev->guest_features);
|
|
|
|
}
|
|
|
|
|
2020-11-18 09:37:44 +01:00
|
|
|
typedef struct {
|
|
|
|
VirtIONet *n;
|
|
|
|
char *id;
|
|
|
|
} FailoverId;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Set the id of the failover primary device
|
|
|
|
*
|
|
|
|
* @opaque: FailoverId to setup
|
|
|
|
* @opts: opts for device we are handling
|
|
|
|
* @errp: returns an error if this function fails
|
|
|
|
*/
|
|
|
|
static int failover_set_primary(void *opaque, QemuOpts *opts, Error **errp)
|
|
|
|
{
|
|
|
|
FailoverId *fid = opaque;
|
|
|
|
const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
|
|
|
|
|
|
|
|
if (g_strcmp0(standby_id, fid->n->netclient_name) == 0) {
|
|
|
|
fid->id = g_strdup(opts->id);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Find the primary device id for this failover virtio-net
|
|
|
|
*
|
|
|
|
* @n: VirtIONet device
|
|
|
|
* @errp: returns an error if this function fails
|
|
|
|
*/
|
|
|
|
static char *failover_find_primary_device_id(VirtIONet *n)
|
|
|
|
{
|
|
|
|
Error *err = NULL;
|
|
|
|
FailoverId fid;
|
|
|
|
|
2020-11-18 09:37:45 +01:00
|
|
|
fid.n = n;
|
2020-11-18 09:37:44 +01:00
|
|
|
if (!qemu_opts_foreach(qemu_find_opts("device"),
|
|
|
|
failover_set_primary, &fid, &err)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return fid.id;
|
|
|
|
}
|
|
|
|
|
2020-11-18 09:37:48 +01:00
|
|
|
/**
|
|
|
|
* Find the primary device for this failover virtio-net
|
|
|
|
*
|
|
|
|
* @n: VirtIONet device
|
|
|
|
* @errp: returns an error if this function fails
|
|
|
|
*/
|
|
|
|
static DeviceState *failover_find_primary_device(VirtIONet *n)
|
|
|
|
{
|
|
|
|
char *id = failover_find_primary_device_id(n);
|
|
|
|
|
|
|
|
if (!id) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return qdev_find_recursive(sysbus_get_default(), id);
|
|
|
|
}
|
|
|
|
|
2019-10-29 12:49:04 +01:00
|
|
|
static void failover_add_primary(VirtIONet *n, Error **errp)
|
|
|
|
{
|
|
|
|
Error *err = NULL;
|
2020-11-18 09:37:31 +01:00
|
|
|
QemuOpts *opts;
|
2020-11-18 09:37:45 +01:00
|
|
|
char *id;
|
2020-11-18 09:37:48 +01:00
|
|
|
DeviceState *dev = failover_find_primary_device(n);
|
2019-10-29 12:49:04 +01:00
|
|
|
|
2020-11-18 09:37:48 +01:00
|
|
|
if (dev) {
|
2019-11-20 16:49:49 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-11-18 09:37:45 +01:00
|
|
|
id = failover_find_primary_device_id(n);
|
|
|
|
if (!id) {
|
2021-02-12 14:52:49 +01:00
|
|
|
error_setg(errp, "Primary device not found");
|
|
|
|
error_append_hint(errp, "Virtio-net failover will not work. Make "
|
|
|
|
"sure primary device has parameter"
|
|
|
|
" failover_pair_id=%s\n", n->netclient_name);
|
2020-11-18 09:37:45 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
opts = qemu_opts_find(qemu_find_opts("device"), id);
|
2021-02-12 14:52:49 +01:00
|
|
|
g_assert(opts); /* cannot be NULL because id was found using opts list */
|
|
|
|
dev = qdev_device_add(opts, &err);
|
|
|
|
if (err) {
|
|
|
|
qemu_opts_del(opts);
|
2019-10-29 12:49:04 +01:00
|
|
|
} else {
|
2021-02-12 14:52:49 +01:00
|
|
|
object_unref(OBJECT(dev));
|
2020-11-18 09:37:23 +01:00
|
|
|
}
|
2020-07-22 10:40:47 +02:00
|
|
|
error_propagate(errp, err);
|
2019-10-29 12:49:04 +01:00
|
|
|
}
|
|
|
|
|
2015-06-03 14:47:19 +02:00
|
|
|
static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
|
2008-12-17 19:13:11 +00:00
|
|
|
{
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIONet *n = VIRTIO_NET(vdev);
|
2019-10-29 12:49:04 +01:00
|
|
|
Error *err = NULL;
|
2013-01-30 19:12:39 +08:00
|
|
|
int i;
|
|
|
|
|
2017-05-23 14:31:19 +02:00
|
|
|
if (n->mtu_bypass_backend &&
|
|
|
|
!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
|
|
|
|
features &= ~(1ULL << VIRTIO_NET_F_MTU);
|
|
|
|
}
|
|
|
|
|
2014-12-11 14:25:06 +01:00
|
|
|
virtio_net_set_multiqueue(n,
|
2020-05-08 15:59:28 +03:00
|
|
|
virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
|
2015-08-17 11:48:29 +02:00
|
|
|
virtio_has_feature(features, VIRTIO_NET_F_MQ));
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2014-12-11 14:25:06 +01:00
|
|
|
virtio_net_set_mrg_rx_bufs(n,
|
2015-08-17 11:48:29 +02:00
|
|
|
virtio_has_feature(features,
|
|
|
|
VIRTIO_NET_F_MRG_RXBUF),
|
|
|
|
virtio_has_feature(features,
|
2020-05-08 15:59:31 +03:00
|
|
|
VIRTIO_F_VERSION_1),
|
|
|
|
virtio_has_feature(features,
|
|
|
|
VIRTIO_NET_F_HASH_REPORT));
|
2009-10-22 17:43:47 +01:00
|
|
|
|
virtio-net: support RSC v4/v6 tcp traffic for Windows HCK
This commit adds implementation of RX packets
coalescing, compatible with requirements of Windows
Hardware compatibility kit.
The device enables feature VIRTIO_NET_F_RSC_EXT in
host features if it supports extended RSC functionality
as defined in the specification.
This feature requires at least one of VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6. Windows guest driver acks
this feature only if VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
is also present.
If the guest driver acks VIRTIO_NET_F_RSC_EXT feature,
the device coalesces TCPv4 and TCPv6 packets (if
respective VIRTIO_NET_F_GUEST_TSO feature is on,
populates extended RSC information in virtio header
and sets VIRTIO_NET_HDR_F_RSC_INFO bit in header flags.
The device does not recalculate checksums in the coalesced
packet, so they are not valid.
In this case:
All the data packets in a tcp connection are cached
to a single buffer in every receive interval, and will
be sent out via a timer, the 'virtio_net_rsc_timeout'
controls the interval, this value may impact the
performance and response time of tcp connection,
50000(50us) is an experience value to gain a performance
improvement, since the whql test sends packets every 100us,
so '300000(300us)' passes the test case, it is the default
value as well, tune it via the command line parameter
'rsc_interval' within 'virtio-net-pci' device, for example,
to launch a guest with interval set as '500000':
'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,
guest_rsc_ext=on,rsc_interval=500000'
The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.
'NetRscChain' is used to save the segments of IPv4/6 in a
VirtIONet device.
A new segment becomes a 'Candidate' as well as it passed sanity check,
the main handler of TCP includes TCP window update, duplicated
ACK check and the real data coalescing.
An 'Candidate' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.
Sanity check includes:
1. Incorrect version in IP header
2. An IP options or IP fragment
3. Not a TCP packet
4. Sanity size check to prevent buffer overflow attack.
5. An ECN packet
Even though, there might more cases should be considered such as
ip identification other flags, while it breaks the test because
windows set it to the same even it's not a fragment.
Normally it includes 2 typical ways to handle a TCP control flag,
'bypass' and 'finalize', 'bypass' means should be sent out directly,
while 'finalize' means the packets should also be bypassed, but this
should be done after search for the same connection packets in the
pool and drain all of them out, this is to avoid out of order fragment.
All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a
finalization, because this normally happens upon a connection is going
to be closed, an 'URG' packet also finalize current coalescing unit.
Statistics can be used to monitor the basic coalescing status, the
'out of order' and 'out of window' means how many retransmitting packets,
thus describe the performance intuitively.
Difference between ip v4 and v6 processing:
Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real 65535 payload.
Note that main goal of implementing this feature in software
is to create reference setup for certification tests. In such
setups guest migration is not required, so the coalesced packets
not yet delivered to the guest will be lost in case of migration.
Signed-off-by: Wei Xu <wexu@redhat.com>
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-01-03 15:12:29 +02:00
|
|
|
n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
|
|
|
|
virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
|
|
|
|
n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
|
|
|
|
virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
|
2020-05-08 15:59:31 +03:00
|
|
|
n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
|
virtio-net: support RSC v4/v6 tcp traffic for Windows HCK
This commit adds implementation of RX packets
coalescing, compatible with requirements of Windows
Hardware compatibility kit.
The device enables feature VIRTIO_NET_F_RSC_EXT in
host features if it supports extended RSC functionality
as defined in the specification.
This feature requires at least one of VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6. Windows guest driver acks
this feature only if VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
is also present.
If the guest driver acks VIRTIO_NET_F_RSC_EXT feature,
the device coalesces TCPv4 and TCPv6 packets (if
respective VIRTIO_NET_F_GUEST_TSO feature is on,
populates extended RSC information in virtio header
and sets VIRTIO_NET_HDR_F_RSC_INFO bit in header flags.
The device does not recalculate checksums in the coalesced
packet, so they are not valid.
In this case:
All the data packets in a tcp connection are cached
to a single buffer in every receive interval, and will
be sent out via a timer, the 'virtio_net_rsc_timeout'
controls the interval, this value may impact the
performance and response time of tcp connection,
50000(50us) is an experience value to gain a performance
improvement, since the whql test sends packets every 100us,
so '300000(300us)' passes the test case, it is the default
value as well, tune it via the command line parameter
'rsc_interval' within 'virtio-net-pci' device, for example,
to launch a guest with interval set as '500000':
'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,
guest_rsc_ext=on,rsc_interval=500000'
The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.
'NetRscChain' is used to save the segments of IPv4/6 in a
VirtIONet device.
A new segment becomes a 'Candidate' as well as it passed sanity check,
the main handler of TCP includes TCP window update, duplicated
ACK check and the real data coalescing.
An 'Candidate' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.
Sanity check includes:
1. Incorrect version in IP header
2. An IP options or IP fragment
3. Not a TCP packet
4. Sanity size check to prevent buffer overflow attack.
5. An ECN packet
Even though, there might more cases should be considered such as
ip identification other flags, while it breaks the test because
windows set it to the same even it's not a fragment.
Normally it includes 2 typical ways to handle a TCP control flag,
'bypass' and 'finalize', 'bypass' means should be sent out directly,
while 'finalize' means the packets should also be bypassed, but this
should be done after search for the same connection packets in the
pool and drain all of them out, this is to avoid out of order fragment.
All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a
finalization, because this normally happens upon a connection is going
to be closed, an 'URG' packet also finalize current coalescing unit.
Statistics can be used to monitor the basic coalescing status, the
'out of order' and 'out of window' means how many retransmitting packets,
thus describe the performance intuitively.
Difference between ip v4 and v6 processing:
Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real 65535 payload.
Note that main goal of implementing this feature in software
is to create reference setup for certification tests. In such
setups guest migration is not required, so the coalesced packets
not yet delivered to the guest will be lost in case of migration.
Signed-off-by: Wei Xu <wexu@redhat.com>
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-01-03 15:12:29 +02:00
|
|
|
|
2009-10-22 17:43:47 +01:00
|
|
|
if (n->has_vnet_hdr) {
|
2013-05-20 11:18:14 +03:00
|
|
|
n->curr_guest_offloads =
|
|
|
|
virtio_net_guest_offloads_by_features(features);
|
|
|
|
virtio_net_apply_guest_offloads(n);
|
2009-10-22 17:43:47 +01:00
|
|
|
}
|
2013-01-30 19:12:39 +08:00
|
|
|
|
|
|
|
for (i = 0; i < n->max_queues; i++) {
|
|
|
|
NetClientState *nc = qemu_get_subqueue(n->nic, i);
|
|
|
|
|
2014-05-27 15:05:08 +03:00
|
|
|
if (!get_vhost_net(nc->peer)) {
|
2013-01-30 19:12:39 +08:00
|
|
|
continue;
|
|
|
|
}
|
2014-05-27 15:05:08 +03:00
|
|
|
vhost_net_ack_features(get_vhost_net(nc->peer), features);
|
2010-03-31 21:20:31 +03:00
|
|
|
}
|
2014-03-26 18:29:52 +08:00
|
|
|
|
2015-08-17 11:48:29 +02:00
|
|
|
if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
|
2014-03-26 18:29:52 +08:00
|
|
|
memset(n->vlans, 0, MAX_VLAN >> 3);
|
|
|
|
} else {
|
|
|
|
memset(n->vlans, 0xff, MAX_VLAN >> 3);
|
|
|
|
}
|
2019-10-29 12:49:04 +01:00
|
|
|
|
|
|
|
if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
|
|
|
|
qapi_event_send_failover_negotiated(n->netclient_name);
|
2020-11-18 09:37:29 +01:00
|
|
|
qatomic_set(&n->failover_primary_hidden, false);
|
2019-10-29 12:49:04 +01:00
|
|
|
failover_add_primary(n, &err);
|
|
|
|
if (err) {
|
2020-11-18 09:37:48 +01:00
|
|
|
warn_report_err(err);
|
2019-10-29 12:49:04 +01:00
|
|
|
}
|
|
|
|
}
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
|
|
|
|
2009-02-05 22:36:20 +00:00
|
|
|
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
|
2013-01-22 23:44:44 +08:00
|
|
|
struct iovec *iov, unsigned int iov_cnt)
|
2009-02-05 22:36:20 +00:00
|
|
|
{
|
|
|
|
uint8_t on;
|
2013-01-22 23:44:44 +08:00
|
|
|
size_t s;
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
NetClientState *nc = qemu_get_queue(n->nic);
|
2009-02-05 22:36:20 +00:00
|
|
|
|
2013-01-22 23:44:44 +08:00
|
|
|
s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
|
|
|
|
if (s != sizeof(on)) {
|
|
|
|
return VIRTIO_NET_ERR;
|
2009-02-05 22:36:20 +00:00
|
|
|
}
|
|
|
|
|
2013-01-22 23:44:46 +08:00
|
|
|
if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
|
2009-02-05 22:36:20 +00:00
|
|
|
n->promisc = on;
|
2013-01-22 23:44:46 +08:00
|
|
|
} else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
|
2009-02-05 22:36:20 +00:00
|
|
|
n->allmulti = on;
|
2013-01-22 23:44:46 +08:00
|
|
|
} else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
|
2009-06-05 14:47:18 -06:00
|
|
|
n->alluni = on;
|
2013-01-22 23:44:46 +08:00
|
|
|
} else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
|
2009-06-05 14:47:18 -06:00
|
|
|
n->nomulti = on;
|
2013-01-22 23:44:46 +08:00
|
|
|
} else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
|
2009-06-05 14:47:18 -06:00
|
|
|
n->nouni = on;
|
2013-01-22 23:44:46 +08:00
|
|
|
} else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
|
2009-06-05 14:47:18 -06:00
|
|
|
n->nobcast = on;
|
2013-01-22 23:44:44 +08:00
|
|
|
} else {
|
2009-02-05 22:36:20 +00:00
|
|
|
return VIRTIO_NET_ERR;
|
2013-01-22 23:44:44 +08:00
|
|
|
}
|
2009-02-05 22:36:20 +00:00
|
|
|
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
rxfilter_notify(nc);
|
|
|
|
|
2009-02-05 22:36:20 +00:00
|
|
|
return VIRTIO_NET_OK;
|
|
|
|
}
|
|
|
|
|
2013-05-20 11:18:14 +03:00
|
|
|
static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
|
|
|
|
struct iovec *iov, unsigned int iov_cnt)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
|
|
|
uint64_t offloads;
|
|
|
|
size_t s;
|
|
|
|
|
2015-08-17 11:48:29 +02:00
|
|
|
if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
|
2013-05-20 11:18:14 +03:00
|
|
|
return VIRTIO_NET_ERR;
|
|
|
|
}
|
|
|
|
|
|
|
|
s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
|
|
|
|
if (s != sizeof(offloads)) {
|
|
|
|
return VIRTIO_NET_ERR;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
|
|
|
|
uint64_t supported_offloads;
|
|
|
|
|
2017-07-14 20:08:18 +08:00
|
|
|
offloads = virtio_ldq_p(vdev, &offloads);
|
|
|
|
|
2013-05-20 11:18:14 +03:00
|
|
|
if (!n->has_vnet_hdr) {
|
|
|
|
return VIRTIO_NET_ERR;
|
|
|
|
}
|
|
|
|
|
virtio-net: support RSC v4/v6 tcp traffic for Windows HCK
This commit adds implementation of RX packets
coalescing, compatible with requirements of Windows
Hardware compatibility kit.
The device enables feature VIRTIO_NET_F_RSC_EXT in
host features if it supports extended RSC functionality
as defined in the specification.
This feature requires at least one of VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6. Windows guest driver acks
this feature only if VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
is also present.
If the guest driver acks VIRTIO_NET_F_RSC_EXT feature,
the device coalesces TCPv4 and TCPv6 packets (if
respective VIRTIO_NET_F_GUEST_TSO feature is on,
populates extended RSC information in virtio header
and sets VIRTIO_NET_HDR_F_RSC_INFO bit in header flags.
The device does not recalculate checksums in the coalesced
packet, so they are not valid.
In this case:
All the data packets in a tcp connection are cached
to a single buffer in every receive interval, and will
be sent out via a timer, the 'virtio_net_rsc_timeout'
controls the interval, this value may impact the
performance and response time of tcp connection,
50000(50us) is an experience value to gain a performance
improvement, since the whql test sends packets every 100us,
so '300000(300us)' passes the test case, it is the default
value as well, tune it via the command line parameter
'rsc_interval' within 'virtio-net-pci' device, for example,
to launch a guest with interval set as '500000':
'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,
guest_rsc_ext=on,rsc_interval=500000'
The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.
'NetRscChain' is used to save the segments of IPv4/6 in a
VirtIONet device.
A new segment becomes a 'Candidate' as well as it passed sanity check,
the main handler of TCP includes TCP window update, duplicated
ACK check and the real data coalescing.
An 'Candidate' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.
Sanity check includes:
1. Incorrect version in IP header
2. An IP options or IP fragment
3. Not a TCP packet
4. Sanity size check to prevent buffer overflow attack.
5. An ECN packet
Even though, there might more cases should be considered such as
ip identification other flags, while it breaks the test because
windows set it to the same even it's not a fragment.
Normally it includes 2 typical ways to handle a TCP control flag,
'bypass' and 'finalize', 'bypass' means should be sent out directly,
while 'finalize' means the packets should also be bypassed, but this
should be done after search for the same connection packets in the
pool and drain all of them out, this is to avoid out of order fragment.
All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a
finalization, because this normally happens upon a connection is going
to be closed, an 'URG' packet also finalize current coalescing unit.
Statistics can be used to monitor the basic coalescing status, the
'out of order' and 'out of window' means how many retransmitting packets,
thus describe the performance intuitively.
Difference between ip v4 and v6 processing:
Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real 65535 payload.
Note that main goal of implementing this feature in software
is to create reference setup for certification tests. In such
setups guest migration is not required, so the coalesced packets
not yet delivered to the guest will be lost in case of migration.
Signed-off-by: Wei Xu <wexu@redhat.com>
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-01-03 15:12:29 +02:00
|
|
|
n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
|
|
|
|
virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
|
|
|
|
n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
|
|
|
|
virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
|
|
|
|
virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
|
|
|
|
|
2013-05-20 11:18:14 +03:00
|
|
|
supported_offloads = virtio_net_supported_guest_offloads(n);
|
|
|
|
if (offloads & ~supported_offloads) {
|
|
|
|
return VIRTIO_NET_ERR;
|
|
|
|
}
|
|
|
|
|
|
|
|
n->curr_guest_offloads = offloads;
|
|
|
|
virtio_net_apply_guest_offloads(n);
|
|
|
|
|
|
|
|
return VIRTIO_NET_OK;
|
|
|
|
} else {
|
|
|
|
return VIRTIO_NET_ERR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-02-05 22:36:28 +00:00
|
|
|
static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
|
2013-01-22 23:44:44 +08:00
|
|
|
struct iovec *iov, unsigned int iov_cnt)
|
2009-02-05 22:36:28 +00:00
|
|
|
{
|
2014-06-24 19:42:54 +02:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
2009-02-05 22:36:28 +00:00
|
|
|
struct virtio_net_ctrl_mac mac_data;
|
2013-01-22 23:44:44 +08:00
|
|
|
size_t s;
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
NetClientState *nc = qemu_get_queue(n->nic);
|
2009-02-05 22:36:28 +00:00
|
|
|
|
2013-01-22 23:44:45 +08:00
|
|
|
if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
|
|
|
|
if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
|
|
|
|
return VIRTIO_NET_ERR;
|
|
|
|
}
|
|
|
|
s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
|
|
|
|
assert(s == sizeof(n->mac));
|
2013-01-30 19:12:22 +08:00
|
|
|
qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
rxfilter_notify(nc);
|
|
|
|
|
2013-01-22 23:44:45 +08:00
|
|
|
return VIRTIO_NET_OK;
|
|
|
|
}
|
|
|
|
|
2013-01-22 23:44:44 +08:00
|
|
|
if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
|
2009-02-05 22:36:28 +00:00
|
|
|
return VIRTIO_NET_ERR;
|
2013-01-22 23:44:44 +08:00
|
|
|
}
|
2009-02-05 22:36:28 +00:00
|
|
|
|
2013-11-11 11:48:36 +08:00
|
|
|
int in_use = 0;
|
|
|
|
int first_multi = 0;
|
|
|
|
uint8_t uni_overflow = 0;
|
|
|
|
uint8_t multi_overflow = 0;
|
|
|
|
uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
|
2009-02-05 22:36:28 +00:00
|
|
|
|
2013-01-22 23:44:44 +08:00
|
|
|
s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
|
|
|
|
sizeof(mac_data.entries));
|
2014-06-24 19:42:54 +02:00
|
|
|
mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
|
2013-01-22 23:44:44 +08:00
|
|
|
if (s != sizeof(mac_data.entries)) {
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
goto error;
|
2013-01-22 23:44:44 +08:00
|
|
|
}
|
|
|
|
iov_discard_front(&iov, &iov_cnt, s);
|
2009-02-05 22:36:28 +00:00
|
|
|
|
2013-01-22 23:44:44 +08:00
|
|
|
if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
goto error;
|
2013-01-22 23:44:44 +08:00
|
|
|
}
|
2009-02-05 22:36:28 +00:00
|
|
|
|
|
|
|
if (mac_data.entries <= MAC_TABLE_ENTRIES) {
|
2013-11-11 11:48:36 +08:00
|
|
|
s = iov_to_buf(iov, iov_cnt, 0, macs,
|
2013-01-22 23:44:44 +08:00
|
|
|
mac_data.entries * ETH_ALEN);
|
|
|
|
if (s != mac_data.entries * ETH_ALEN) {
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
goto error;
|
2013-01-22 23:44:44 +08:00
|
|
|
}
|
2013-11-11 11:48:36 +08:00
|
|
|
in_use += mac_data.entries;
|
2009-02-05 22:36:28 +00:00
|
|
|
} else {
|
2013-11-11 11:48:36 +08:00
|
|
|
uni_overflow = 1;
|
2009-02-05 22:36:28 +00:00
|
|
|
}
|
|
|
|
|
2013-01-22 23:44:44 +08:00
|
|
|
iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
|
|
|
|
|
2013-11-11 11:48:36 +08:00
|
|
|
first_multi = in_use;
|
2009-06-05 14:47:13 -06:00
|
|
|
|
2013-01-22 23:44:44 +08:00
|
|
|
s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
|
|
|
|
sizeof(mac_data.entries));
|
2014-06-24 19:42:54 +02:00
|
|
|
mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
|
2013-01-22 23:44:44 +08:00
|
|
|
if (s != sizeof(mac_data.entries)) {
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
goto error;
|
2013-01-22 23:44:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
iov_discard_front(&iov, &iov_cnt, s);
|
2009-02-05 22:36:28 +00:00
|
|
|
|
2013-01-22 23:44:44 +08:00
|
|
|
if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
goto error;
|
2013-01-22 23:44:44 +08:00
|
|
|
}
|
2009-02-05 22:36:28 +00:00
|
|
|
|
2014-04-11 15:18:08 +03:00
|
|
|
if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
|
2013-11-11 11:48:36 +08:00
|
|
|
s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
|
2013-01-22 23:44:44 +08:00
|
|
|
mac_data.entries * ETH_ALEN);
|
|
|
|
if (s != mac_data.entries * ETH_ALEN) {
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
goto error;
|
2009-06-05 14:47:08 -06:00
|
|
|
}
|
2013-11-11 11:48:36 +08:00
|
|
|
in_use += mac_data.entries;
|
2013-01-22 23:44:44 +08:00
|
|
|
} else {
|
2013-11-11 11:48:36 +08:00
|
|
|
multi_overflow = 1;
|
2009-02-05 22:36:28 +00:00
|
|
|
}
|
|
|
|
|
2013-11-11 11:48:36 +08:00
|
|
|
n->mac_table.in_use = in_use;
|
|
|
|
n->mac_table.first_multi = first_multi;
|
|
|
|
n->mac_table.uni_overflow = uni_overflow;
|
|
|
|
n->mac_table.multi_overflow = multi_overflow;
|
|
|
|
memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
|
|
|
|
g_free(macs);
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
rxfilter_notify(nc);
|
|
|
|
|
2009-02-05 22:36:28 +00:00
|
|
|
return VIRTIO_NET_OK;
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
|
|
|
|
error:
|
2013-11-11 11:48:36 +08:00
|
|
|
g_free(macs);
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
return VIRTIO_NET_ERR;
|
2009-02-05 22:36:28 +00:00
|
|
|
}
|
|
|
|
|
2009-02-05 22:36:32 +00:00
|
|
|
static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
|
2013-01-22 23:44:44 +08:00
|
|
|
struct iovec *iov, unsigned int iov_cnt)
|
2009-02-05 22:36:32 +00:00
|
|
|
{
|
2014-06-24 19:42:54 +02:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
2009-02-05 22:36:32 +00:00
|
|
|
uint16_t vid;
|
2013-01-22 23:44:44 +08:00
|
|
|
size_t s;
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
NetClientState *nc = qemu_get_queue(n->nic);
|
2009-02-05 22:36:32 +00:00
|
|
|
|
2013-01-22 23:44:44 +08:00
|
|
|
s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
|
2014-06-24 19:42:54 +02:00
|
|
|
vid = virtio_lduw_p(vdev, &vid);
|
2013-01-22 23:44:44 +08:00
|
|
|
if (s != sizeof(vid)) {
|
2009-02-05 22:36:32 +00:00
|
|
|
return VIRTIO_NET_ERR;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vid >= MAX_VLAN)
|
|
|
|
return VIRTIO_NET_ERR;
|
|
|
|
|
|
|
|
if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
|
|
|
|
n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
|
|
|
|
else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
|
|
|
|
n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
|
|
|
|
else
|
|
|
|
return VIRTIO_NET_ERR;
|
|
|
|
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
rxfilter_notify(nc);
|
|
|
|
|
2009-02-05 22:36:32 +00:00
|
|
|
return VIRTIO_NET_OK;
|
|
|
|
}
|
|
|
|
|
2014-05-20 14:01:44 +08:00
|
|
|
static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
|
|
|
|
struct iovec *iov, unsigned int iov_cnt)
|
|
|
|
{
|
2019-02-27 13:24:07 +00:00
|
|
|
trace_virtio_net_handle_announce(n->announce_timer.round);
|
2014-05-20 14:01:44 +08:00
|
|
|
if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
|
|
|
|
n->status & VIRTIO_NET_S_ANNOUNCE) {
|
|
|
|
n->status &= ~VIRTIO_NET_S_ANNOUNCE;
|
2019-02-27 13:24:07 +00:00
|
|
|
if (n->announce_timer.round) {
|
|
|
|
qemu_announce_timer_step(&n->announce_timer);
|
2014-05-20 14:01:44 +08:00
|
|
|
}
|
|
|
|
return VIRTIO_NET_OK;
|
|
|
|
} else {
|
|
|
|
return VIRTIO_NET_ERR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-14 14:48:33 +03:00
|
|
|
static void virtio_net_detach_epbf_rss(VirtIONet *n);
|
|
|
|
|
2020-05-08 15:59:28 +03:00
|
|
|
static void virtio_net_disable_rss(VirtIONet *n)
|
|
|
|
{
|
|
|
|
if (n->rss_data.enabled) {
|
|
|
|
trace_virtio_net_rss_disable();
|
|
|
|
}
|
|
|
|
n->rss_data.enabled = false;
|
2021-05-14 14:48:33 +03:00
|
|
|
|
|
|
|
virtio_net_detach_epbf_rss(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
|
|
|
|
{
|
|
|
|
NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
|
|
|
|
if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return nc->info->set_steering_ebpf(nc, prog_fd);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void rss_data_to_rss_config(struct VirtioNetRssData *data,
|
|
|
|
struct EBPFRSSConfig *config)
|
|
|
|
{
|
|
|
|
config->redirect = data->redirect;
|
|
|
|
config->populate_hash = data->populate_hash;
|
|
|
|
config->hash_types = data->hash_types;
|
|
|
|
config->indirections_len = data->indirections_len;
|
|
|
|
config->default_queue = data->default_queue;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool virtio_net_attach_epbf_rss(VirtIONet *n)
|
|
|
|
{
|
|
|
|
struct EBPFRSSConfig config = {};
|
|
|
|
|
|
|
|
if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
rss_data_to_rss_config(&n->rss_data, &config);
|
|
|
|
|
|
|
|
if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
|
|
|
|
n->rss_data.indirections_table, n->rss_data.key)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_net_detach_epbf_rss(VirtIONet *n)
|
|
|
|
{
|
|
|
|
virtio_net_attach_ebpf_to_backend(n->nic, -1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool virtio_net_load_ebpf(VirtIONet *n)
|
|
|
|
{
|
|
|
|
if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
|
|
|
|
/* backend does't support steering ebpf */
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ebpf_rss_load(&n->ebpf_rss);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_net_unload_ebpf(VirtIONet *n)
|
|
|
|
{
|
|
|
|
virtio_net_attach_ebpf_to_backend(n->nic, -1);
|
|
|
|
ebpf_rss_unload(&n->ebpf_rss);
|
2020-05-08 15:59:28 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static uint16_t virtio_net_handle_rss(VirtIONet *n,
|
2020-05-08 15:59:31 +03:00
|
|
|
struct iovec *iov,
|
|
|
|
unsigned int iov_cnt,
|
|
|
|
bool do_rss)
|
2020-05-08 15:59:28 +03:00
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
|
|
|
struct virtio_net_rss_config cfg;
|
|
|
|
size_t s, offset = 0, size_get;
|
|
|
|
uint16_t queues, i;
|
|
|
|
struct {
|
|
|
|
uint16_t us;
|
|
|
|
uint8_t b;
|
|
|
|
} QEMU_PACKED temp;
|
|
|
|
const char *err_msg = "";
|
|
|
|
uint32_t err_value = 0;
|
|
|
|
|
2020-05-08 15:59:31 +03:00
|
|
|
if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
|
2020-05-08 15:59:28 +03:00
|
|
|
err_msg = "RSS is not negotiated";
|
|
|
|
goto error;
|
|
|
|
}
|
2020-05-08 15:59:31 +03:00
|
|
|
if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
|
|
|
|
err_msg = "Hash report is not negotiated";
|
|
|
|
goto error;
|
|
|
|
}
|
2020-05-08 15:59:28 +03:00
|
|
|
size_get = offsetof(struct virtio_net_rss_config, indirection_table);
|
|
|
|
s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
|
|
|
|
if (s != size_get) {
|
|
|
|
err_msg = "Short command buffer";
|
|
|
|
err_value = (uint32_t)s;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
|
|
|
|
n->rss_data.indirections_len =
|
|
|
|
virtio_lduw_p(vdev, &cfg.indirection_table_mask);
|
|
|
|
n->rss_data.indirections_len++;
|
2020-05-08 15:59:31 +03:00
|
|
|
if (!do_rss) {
|
|
|
|
n->rss_data.indirections_len = 1;
|
|
|
|
}
|
2020-05-08 15:59:28 +03:00
|
|
|
if (!is_power_of_2(n->rss_data.indirections_len)) {
|
|
|
|
err_msg = "Invalid size of indirection table";
|
|
|
|
err_value = n->rss_data.indirections_len;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
|
|
|
|
err_msg = "Too large indirection table";
|
|
|
|
err_value = n->rss_data.indirections_len;
|
|
|
|
goto error;
|
|
|
|
}
|
2020-05-08 15:59:31 +03:00
|
|
|
n->rss_data.default_queue = do_rss ?
|
|
|
|
virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
|
2020-05-08 15:59:28 +03:00
|
|
|
if (n->rss_data.default_queue >= n->max_queues) {
|
|
|
|
err_msg = "Invalid default queue";
|
|
|
|
err_value = n->rss_data.default_queue;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
offset += size_get;
|
|
|
|
size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
|
|
|
|
g_free(n->rss_data.indirections_table);
|
|
|
|
n->rss_data.indirections_table = g_malloc(size_get);
|
|
|
|
if (!n->rss_data.indirections_table) {
|
|
|
|
err_msg = "Can't allocate indirections table";
|
|
|
|
err_value = n->rss_data.indirections_len;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
s = iov_to_buf(iov, iov_cnt, offset,
|
|
|
|
n->rss_data.indirections_table, size_get);
|
|
|
|
if (s != size_get) {
|
|
|
|
err_msg = "Short indirection table buffer";
|
|
|
|
err_value = (uint32_t)s;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
for (i = 0; i < n->rss_data.indirections_len; ++i) {
|
|
|
|
uint16_t val = n->rss_data.indirections_table[i];
|
|
|
|
n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
|
|
|
|
}
|
|
|
|
offset += size_get;
|
|
|
|
size_get = sizeof(temp);
|
|
|
|
s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
|
|
|
|
if (s != size_get) {
|
|
|
|
err_msg = "Can't get queues";
|
|
|
|
err_value = (uint32_t)s;
|
|
|
|
goto error;
|
|
|
|
}
|
2020-05-08 15:59:31 +03:00
|
|
|
queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
|
2020-05-08 15:59:28 +03:00
|
|
|
if (queues == 0 || queues > n->max_queues) {
|
|
|
|
err_msg = "Invalid number of queues";
|
|
|
|
err_value = queues;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
|
|
|
|
err_msg = "Invalid key size";
|
|
|
|
err_value = temp.b;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (!temp.b && n->rss_data.hash_types) {
|
|
|
|
err_msg = "No key provided";
|
|
|
|
err_value = 0;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
if (!temp.b && !n->rss_data.hash_types) {
|
|
|
|
virtio_net_disable_rss(n);
|
|
|
|
return queues;
|
|
|
|
}
|
|
|
|
offset += size_get;
|
|
|
|
size_get = temp.b;
|
|
|
|
s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
|
|
|
|
if (s != size_get) {
|
|
|
|
err_msg = "Can get key buffer";
|
|
|
|
err_value = (uint32_t)s;
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
n->rss_data.enabled = true;
|
2021-05-14 14:48:33 +03:00
|
|
|
|
|
|
|
if (!n->rss_data.populate_hash) {
|
|
|
|
if (!virtio_net_attach_epbf_rss(n)) {
|
|
|
|
/* EBPF must be loaded for vhost */
|
|
|
|
if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
|
|
|
|
warn_report("Can't load eBPF RSS for vhost");
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
/* fallback to software RSS */
|
|
|
|
warn_report("Can't load eBPF RSS - fallback to software RSS");
|
|
|
|
n->rss_data.enabled_software_rss = true;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* use software RSS for hash populating */
|
|
|
|
/* and detach eBPF if was loaded before */
|
|
|
|
virtio_net_detach_epbf_rss(n);
|
|
|
|
n->rss_data.enabled_software_rss = true;
|
|
|
|
}
|
|
|
|
|
2020-05-08 15:59:28 +03:00
|
|
|
trace_virtio_net_rss_enable(n->rss_data.hash_types,
|
|
|
|
n->rss_data.indirections_len,
|
|
|
|
temp.b);
|
|
|
|
return queues;
|
|
|
|
error:
|
|
|
|
trace_virtio_net_rss_error(err_msg, err_value);
|
|
|
|
virtio_net_disable_rss(n);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-01-30 19:12:39 +08:00
|
|
|
static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
|
2013-03-06 13:50:27 +08:00
|
|
|
struct iovec *iov, unsigned int iov_cnt)
|
2013-01-30 19:12:39 +08:00
|
|
|
{
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
2013-03-06 13:50:27 +08:00
|
|
|
uint16_t queues;
|
2013-01-30 19:12:39 +08:00
|
|
|
|
2020-05-08 15:59:28 +03:00
|
|
|
virtio_net_disable_rss(n);
|
2020-05-08 15:59:31 +03:00
|
|
|
if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
|
|
|
|
queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
|
|
|
|
return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
|
|
|
|
}
|
2020-05-08 15:59:28 +03:00
|
|
|
if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
|
2020-05-08 15:59:31 +03:00
|
|
|
queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
|
2020-05-08 15:59:28 +03:00
|
|
|
} else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
|
|
|
|
struct virtio_net_ctrl_mq mq;
|
|
|
|
size_t s;
|
|
|
|
if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
|
|
|
|
return VIRTIO_NET_ERR;
|
|
|
|
}
|
|
|
|
s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
|
|
|
|
if (s != sizeof(mq)) {
|
|
|
|
return VIRTIO_NET_ERR;
|
|
|
|
}
|
|
|
|
queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
|
2013-01-30 19:12:39 +08:00
|
|
|
|
2020-05-08 15:59:28 +03:00
|
|
|
} else {
|
2013-01-30 19:12:39 +08:00
|
|
|
return VIRTIO_NET_ERR;
|
|
|
|
}
|
|
|
|
|
2013-03-06 13:50:27 +08:00
|
|
|
if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
|
|
|
|
queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
|
|
|
|
queues > n->max_queues ||
|
2013-01-30 19:12:39 +08:00
|
|
|
!n->multiqueue) {
|
|
|
|
return VIRTIO_NET_ERR;
|
|
|
|
}
|
|
|
|
|
2013-03-06 13:50:27 +08:00
|
|
|
n->curr_queues = queues;
|
2013-01-30 19:12:39 +08:00
|
|
|
/* stop the backend before changing the number of queues to avoid handling a
|
|
|
|
* disabled queue */
|
2013-04-11 16:30:01 +02:00
|
|
|
virtio_net_set_status(vdev, vdev->status);
|
2013-01-30 19:12:39 +08:00
|
|
|
virtio_net_set_queues(n);
|
|
|
|
|
|
|
|
return VIRTIO_NET_OK;
|
|
|
|
}
|
2016-09-30 17:13:16 +02:00
|
|
|
|
2009-02-05 22:36:16 +00:00
|
|
|
static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIONet *n = VIRTIO_NET(vdev);
|
2009-02-05 22:36:16 +00:00
|
|
|
struct virtio_net_ctrl_hdr ctrl;
|
|
|
|
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
|
2016-02-04 16:26:51 +02:00
|
|
|
VirtQueueElement *elem;
|
2013-01-22 23:44:44 +08:00
|
|
|
size_t s;
|
2014-11-27 18:04:03 +08:00
|
|
|
struct iovec *iov, *iov2;
|
2013-01-22 23:44:44 +08:00
|
|
|
unsigned int iov_cnt;
|
2009-02-05 22:36:16 +00:00
|
|
|
|
2016-02-04 16:26:51 +02:00
|
|
|
for (;;) {
|
|
|
|
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
|
|
|
|
if (!elem) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
|
|
|
|
iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
|
2016-09-30 17:13:16 +02:00
|
|
|
virtio_error(vdev, "virtio-net ctrl missing headers");
|
|
|
|
virtqueue_detach_element(vq, elem, 0);
|
|
|
|
g_free(elem);
|
|
|
|
break;
|
2009-02-05 22:36:16 +00:00
|
|
|
}
|
|
|
|
|
2016-02-04 16:26:51 +02:00
|
|
|
iov_cnt = elem->out_num;
|
|
|
|
iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
|
2013-01-22 23:44:44 +08:00
|
|
|
s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
|
|
|
|
iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
|
|
|
|
if (s != sizeof(ctrl)) {
|
|
|
|
status = VIRTIO_NET_ERR;
|
2013-01-22 23:44:46 +08:00
|
|
|
} else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
|
2013-01-22 23:44:44 +08:00
|
|
|
status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
|
|
|
|
} else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
|
|
|
|
status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
|
|
|
|
} else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
|
|
|
|
status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
|
2014-05-20 14:01:44 +08:00
|
|
|
} else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
|
|
|
|
status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
|
2013-01-30 19:12:39 +08:00
|
|
|
} else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
|
2013-03-06 13:50:27 +08:00
|
|
|
status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
|
2013-05-20 11:18:14 +03:00
|
|
|
} else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
|
|
|
|
status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
|
2009-02-05 22:36:16 +00:00
|
|
|
}
|
|
|
|
|
2016-02-04 16:26:51 +02:00
|
|
|
s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
|
2013-01-22 23:44:44 +08:00
|
|
|
assert(s == sizeof(status));
|
2009-02-05 22:36:16 +00:00
|
|
|
|
2016-02-04 16:26:51 +02:00
|
|
|
virtqueue_push(vq, elem, sizeof(status));
|
2009-02-05 22:36:16 +00:00
|
|
|
virtio_notify(vdev, vq);
|
2014-11-27 18:04:03 +08:00
|
|
|
g_free(iov2);
|
2016-02-04 16:26:51 +02:00
|
|
|
g_free(elem);
|
2009-02-05 22:36:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-12-17 19:13:11 +00:00
|
|
|
/* RX */
|
|
|
|
|
|
|
|
static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIONet *n = VIRTIO_NET(vdev);
|
2013-01-30 19:12:39 +08:00
|
|
|
int queue_index = vq2q(virtio_get_queue_index(vq));
|
2009-04-29 13:40:02 +01:00
|
|
|
|
2013-01-30 19:12:39 +08:00
|
|
|
qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
|
|
|
|
2020-03-05 18:56:49 +01:00
|
|
|
static bool virtio_net_can_receive(NetClientState *nc)
|
2008-12-17 19:13:11 +00:00
|
|
|
{
|
2013-01-30 19:12:23 +08:00
|
|
|
VirtIONet *n = qemu_get_nic_opaque(nc);
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
2013-01-30 19:12:39 +08:00
|
|
|
VirtIONetQueue *q = virtio_net_get_subqueue(nc);
|
2013-01-30 19:12:38 +08:00
|
|
|
|
2013-04-11 16:30:01 +02:00
|
|
|
if (!vdev->vm_running) {
|
2020-03-05 18:56:49 +01:00
|
|
|
return false;
|
2010-11-22 19:52:19 +02:00
|
|
|
}
|
2009-10-27 18:16:38 +00:00
|
|
|
|
2013-01-30 19:12:39 +08:00
|
|
|
if (nc->queue_index >= n->curr_queues) {
|
2020-03-05 18:56:49 +01:00
|
|
|
return false;
|
2013-01-30 19:12:39 +08:00
|
|
|
}
|
|
|
|
|
2013-01-30 19:12:38 +08:00
|
|
|
if (!virtio_queue_ready(q->rx_vq) ||
|
2013-04-11 16:30:01 +02:00
|
|
|
!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
|
2020-03-05 18:56:49 +01:00
|
|
|
return false;
|
2013-01-30 19:12:38 +08:00
|
|
|
}
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2020-03-05 18:56:49 +01:00
|
|
|
return true;
|
2009-10-27 18:16:38 +00:00
|
|
|
}
|
|
|
|
|
2013-01-30 19:12:38 +08:00
|
|
|
static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
|
2009-10-27 18:16:38 +00:00
|
|
|
{
|
2013-01-30 19:12:38 +08:00
|
|
|
VirtIONet *n = q->n;
|
|
|
|
if (virtio_queue_empty(q->rx_vq) ||
|
2008-12-17 19:13:11 +00:00
|
|
|
(n->mergeable_rx_bufs &&
|
2013-01-30 19:12:38 +08:00
|
|
|
!virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
|
|
|
|
virtio_queue_set_notification(q->rx_vq, 1);
|
2010-02-08 10:10:01 -06:00
|
|
|
|
|
|
|
/* To avoid a race condition where the guest has made some buffers
|
|
|
|
* available after the above check but before notification was
|
|
|
|
* enabled, check for available buffers again.
|
|
|
|
*/
|
2013-01-30 19:12:38 +08:00
|
|
|
if (virtio_queue_empty(q->rx_vq) ||
|
2010-02-08 10:10:01 -06:00
|
|
|
(n->mergeable_rx_bufs &&
|
2013-01-30 19:12:38 +08:00
|
|
|
!virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
|
2010-02-08 10:10:01 -06:00
|
|
|
return 0;
|
2013-01-30 19:12:38 +08:00
|
|
|
}
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
|
|
|
|
2013-01-30 19:12:38 +08:00
|
|
|
virtio_queue_set_notification(q->rx_vq, 0);
|
2008-12-17 19:13:11 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2014-06-24 19:42:54 +02:00
|
|
|
static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
|
2014-06-24 19:11:32 +02:00
|
|
|
{
|
2014-06-24 19:42:54 +02:00
|
|
|
virtio_tswap16s(vdev, &hdr->hdr_len);
|
|
|
|
virtio_tswap16s(vdev, &hdr->gso_size);
|
|
|
|
virtio_tswap16s(vdev, &hdr->csum_start);
|
|
|
|
virtio_tswap16s(vdev, &hdr->csum_offset);
|
2014-06-24 19:11:32 +02:00
|
|
|
}
|
|
|
|
|
2009-10-22 17:43:48 +01:00
|
|
|
/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
|
|
|
|
* it never finds out that the packets don't have valid checksums. This
|
|
|
|
* causes dhclient to get upset. Fedora's carried a patch for ages to
|
|
|
|
* fix this with Xen but it hasn't appeared in an upstream release of
|
|
|
|
* dhclient yet.
|
|
|
|
*
|
|
|
|
* To avoid breaking existing guests, we catch udp packets and add
|
|
|
|
* checksums. This is terrible but it's better than hacking the guest
|
|
|
|
* kernels.
|
|
|
|
*
|
|
|
|
* N.B. if we introduce a zero-copy API, this operation is no longer free so
|
|
|
|
* we should provide a mechanism to disable it to avoid polluting the host
|
|
|
|
* cache.
|
|
|
|
*/
|
|
|
|
static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
|
2012-09-24 13:14:16 +02:00
|
|
|
uint8_t *buf, size_t size)
|
2009-10-22 17:43:48 +01:00
|
|
|
{
|
|
|
|
if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
|
|
|
|
(size > 27 && size < 1500) && /* normal sized MTU */
|
|
|
|
(buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
|
|
|
|
(buf[23] == 17) && /* ip.protocol == UDP */
|
|
|
|
(buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
|
2020-12-11 17:35:12 +08:00
|
|
|
net_checksum_calculate(buf, size, CSUM_UDP);
|
2009-10-22 17:43:48 +01:00
|
|
|
hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-09-24 13:24:17 +02:00
|
|
|
static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
|
|
|
|
const void *buf, size_t size)
|
2008-12-17 19:13:11 +00:00
|
|
|
{
|
2009-10-22 17:43:45 +01:00
|
|
|
if (n->has_vnet_hdr) {
|
2012-09-24 13:14:16 +02:00
|
|
|
/* FIXME this cast is evil */
|
|
|
|
void *wbuf = (void *)buf;
|
2012-09-24 13:24:17 +02:00
|
|
|
work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
|
|
|
|
size - n->host_hdr_len);
|
2016-02-05 11:43:11 +01:00
|
|
|
|
|
|
|
if (n->needs_vnet_hdr_swap) {
|
|
|
|
virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
|
|
|
|
}
|
2012-09-24 13:24:17 +02:00
|
|
|
iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
|
2012-09-24 13:14:16 +02:00
|
|
|
} else {
|
|
|
|
struct virtio_net_hdr hdr = {
|
|
|
|
.flags = 0,
|
|
|
|
.gso_type = VIRTIO_NET_HDR_GSO_NONE
|
|
|
|
};
|
|
|
|
iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
|
2009-10-22 17:43:45 +01:00
|
|
|
}
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
|
|
|
|
2009-02-05 22:36:24 +00:00
|
|
|
static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
|
|
|
|
{
|
|
|
|
static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
|
2009-02-05 22:36:32 +00:00
|
|
|
static const uint8_t vlan[] = {0x81, 0x00};
|
2009-02-05 22:36:24 +00:00
|
|
|
uint8_t *ptr = (uint8_t *)buf;
|
2009-02-05 22:36:28 +00:00
|
|
|
int i;
|
2009-02-05 22:36:24 +00:00
|
|
|
|
|
|
|
if (n->promisc)
|
|
|
|
return 1;
|
|
|
|
|
2012-09-24 16:27:27 +02:00
|
|
|
ptr += n->host_hdr_len;
|
2009-10-22 17:43:45 +01:00
|
|
|
|
2009-02-05 22:36:32 +00:00
|
|
|
if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
|
2016-06-16 18:17:25 +01:00
|
|
|
int vid = lduw_be_p(ptr + 14) & 0xfff;
|
2009-02-05 22:36:32 +00:00
|
|
|
if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-06-05 14:47:02 -06:00
|
|
|
if (ptr[0] & 1) { // multicast
|
|
|
|
if (!memcmp(ptr, bcast, sizeof(bcast))) {
|
2009-06-05 14:47:18 -06:00
|
|
|
return !n->nobcast;
|
|
|
|
} else if (n->nomulti) {
|
|
|
|
return 0;
|
2009-06-05 14:47:08 -06:00
|
|
|
} else if (n->allmulti || n->mac_table.multi_overflow) {
|
2009-06-05 14:47:02 -06:00
|
|
|
return 1;
|
|
|
|
}
|
2009-06-05 14:47:13 -06:00
|
|
|
|
|
|
|
for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
|
|
|
|
if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
2009-06-05 14:47:02 -06:00
|
|
|
} else { // unicast
|
2009-06-05 14:47:18 -06:00
|
|
|
if (n->nouni) {
|
|
|
|
return 0;
|
|
|
|
} else if (n->alluni || n->mac_table.uni_overflow) {
|
2009-06-05 14:47:08 -06:00
|
|
|
return 1;
|
|
|
|
} else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
|
2009-06-05 14:47:02 -06:00
|
|
|
return 1;
|
|
|
|
}
|
2009-02-05 22:36:24 +00:00
|
|
|
|
2009-06-05 14:47:13 -06:00
|
|
|
for (i = 0; i < n->mac_table.first_multi; i++) {
|
|
|
|
if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
2009-02-05 22:36:28 +00:00
|
|
|
}
|
|
|
|
|
2009-02-05 22:36:24 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-05-08 15:59:29 +03:00
|
|
|
static uint8_t virtio_net_get_hash_type(bool isip4,
|
|
|
|
bool isip6,
|
|
|
|
bool isudp,
|
|
|
|
bool istcp,
|
|
|
|
uint32_t types)
|
|
|
|
{
|
|
|
|
if (isip4) {
|
|
|
|
if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
|
|
|
|
return NetPktRssIpV4Tcp;
|
|
|
|
}
|
|
|
|
if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
|
|
|
|
return NetPktRssIpV4Udp;
|
|
|
|
}
|
|
|
|
if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
|
|
|
|
return NetPktRssIpV4;
|
|
|
|
}
|
|
|
|
} else if (isip6) {
|
|
|
|
uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
|
|
|
|
VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
|
|
|
|
|
|
|
|
if (istcp && (types & mask)) {
|
|
|
|
return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
|
|
|
|
NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
|
|
|
|
}
|
|
|
|
mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
|
|
|
|
if (isudp && (types & mask)) {
|
|
|
|
return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
|
|
|
|
NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
|
|
|
|
}
|
|
|
|
mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
|
|
|
|
if (types & mask) {
|
|
|
|
return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
|
|
|
|
NetPktRssIpV6Ex : NetPktRssIpV6;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0xff;
|
|
|
|
}
|
|
|
|
|
2020-05-08 15:59:31 +03:00
|
|
|
static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
|
|
|
|
uint32_t hash)
|
|
|
|
{
|
|
|
|
struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
|
|
|
|
hdr->hash_value = hash;
|
|
|
|
hdr->hash_report = report;
|
|
|
|
}
|
|
|
|
|
2020-05-08 15:59:29 +03:00
|
|
|
static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
|
|
|
|
size_t size)
|
|
|
|
{
|
|
|
|
VirtIONet *n = qemu_get_nic_opaque(nc);
|
2020-05-08 15:59:31 +03:00
|
|
|
unsigned int index = nc->queue_index, new_index = index;
|
2020-05-08 15:59:29 +03:00
|
|
|
struct NetRxPkt *pkt = n->rx_pkt;
|
|
|
|
uint8_t net_hash_type;
|
|
|
|
uint32_t hash;
|
|
|
|
bool isip4, isip6, isudp, istcp;
|
2020-05-08 15:59:31 +03:00
|
|
|
static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
|
|
|
|
VIRTIO_NET_HASH_REPORT_IPv4,
|
|
|
|
VIRTIO_NET_HASH_REPORT_TCPv4,
|
|
|
|
VIRTIO_NET_HASH_REPORT_TCPv6,
|
|
|
|
VIRTIO_NET_HASH_REPORT_IPv6,
|
|
|
|
VIRTIO_NET_HASH_REPORT_IPv6_EX,
|
|
|
|
VIRTIO_NET_HASH_REPORT_TCPv6_EX,
|
|
|
|
VIRTIO_NET_HASH_REPORT_UDPv4,
|
|
|
|
VIRTIO_NET_HASH_REPORT_UDPv6,
|
|
|
|
VIRTIO_NET_HASH_REPORT_UDPv6_EX
|
|
|
|
};
|
2020-05-08 15:59:29 +03:00
|
|
|
|
|
|
|
net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
|
|
|
|
size - n->host_hdr_len);
|
|
|
|
net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
|
|
|
|
if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
|
|
|
|
istcp = isudp = false;
|
|
|
|
}
|
|
|
|
if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
|
|
|
|
istcp = isudp = false;
|
|
|
|
}
|
|
|
|
net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
|
|
|
|
n->rss_data.hash_types);
|
|
|
|
if (net_hash_type > NetPktRssIpV6UdpEx) {
|
2020-05-08 15:59:31 +03:00
|
|
|
if (n->rss_data.populate_hash) {
|
|
|
|
virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
|
|
|
|
}
|
|
|
|
return n->rss_data.redirect ? n->rss_data.default_queue : -1;
|
2020-05-08 15:59:29 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
|
2020-05-08 15:59:31 +03:00
|
|
|
|
|
|
|
if (n->rss_data.populate_hash) {
|
|
|
|
virtio_set_packet_hash(buf, reports[net_hash_type], hash);
|
2020-05-08 15:59:29 +03:00
|
|
|
}
|
2020-05-08 15:59:31 +03:00
|
|
|
|
|
|
|
if (n->rss_data.redirect) {
|
|
|
|
new_index = hash & (n->rss_data.indirections_len - 1);
|
|
|
|
new_index = n->rss_data.indirections_table[new_index];
|
|
|
|
}
|
|
|
|
|
|
|
|
return (index == new_index) ? -1 : new_index;
|
2020-05-08 15:59:29 +03:00
|
|
|
}
|
|
|
|
|
2017-01-27 16:40:20 +01:00
|
|
|
static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
|
2020-05-08 15:59:29 +03:00
|
|
|
size_t size, bool no_rss)
|
2008-12-17 19:13:11 +00:00
|
|
|
{
|
2013-01-30 19:12:23 +08:00
|
|
|
VirtIONet *n = qemu_get_nic_opaque(nc);
|
2013-01-30 19:12:39 +08:00
|
|
|
VirtIONetQueue *q = virtio_net_get_subqueue(nc);
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
2012-09-24 13:17:13 +02:00
|
|
|
struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
|
|
|
|
struct virtio_net_hdr_mrg_rxbuf mhdr;
|
|
|
|
unsigned mhdr_cnt = 0;
|
2012-09-24 13:14:16 +02:00
|
|
|
size_t offset, i, guest_offset;
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2013-01-30 19:12:39 +08:00
|
|
|
if (!virtio_net_can_receive(nc)) {
|
2009-10-27 18:16:38 +00:00
|
|
|
return -1;
|
2013-01-30 19:12:22 +08:00
|
|
|
}
|
2009-10-27 18:16:38 +00:00
|
|
|
|
2021-05-14 14:48:33 +03:00
|
|
|
if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
|
2020-05-08 15:59:29 +03:00
|
|
|
int index = virtio_net_process_rss(nc, buf, size);
|
|
|
|
if (index >= 0) {
|
|
|
|
NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
|
|
|
|
return virtio_net_receive_rcu(nc2, buf, size, true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-06-06 18:53:10 +03:00
|
|
|
/* hdr_len refers to the header we supply to the guest */
|
2013-01-30 19:12:38 +08:00
|
|
|
if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
|
2009-04-29 13:40:02 +01:00
|
|
|
return 0;
|
2013-01-30 19:12:38 +08:00
|
|
|
}
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2009-02-05 22:36:24 +00:00
|
|
|
if (!receive_filter(n, buf, size))
|
2009-05-18 13:40:55 +01:00
|
|
|
return size;
|
2009-02-05 22:36:24 +00:00
|
|
|
|
2008-12-17 19:13:11 +00:00
|
|
|
offset = i = 0;
|
|
|
|
|
|
|
|
while (offset < size) {
|
2016-02-04 16:26:51 +02:00
|
|
|
VirtQueueElement *elem;
|
2008-12-17 19:13:11 +00:00
|
|
|
int len, total;
|
2016-02-04 16:26:51 +02:00
|
|
|
const struct iovec *sg;
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2010-01-13 16:24:43 +05:30
|
|
|
total = 0;
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2016-02-04 16:26:51 +02:00
|
|
|
elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
|
|
|
|
if (!elem) {
|
2016-09-30 17:13:24 +02:00
|
|
|
if (i) {
|
|
|
|
virtio_error(vdev, "virtio-net unexpected empty queue: "
|
|
|
|
"i %zd mergeable %d offset %zd, size %zd, "
|
|
|
|
"guest hdr len %zd, host hdr len %zd "
|
|
|
|
"guest features 0x%" PRIx64,
|
|
|
|
i, n->mergeable_rx_bufs, offset, size,
|
|
|
|
n->guest_hdr_len, n->host_hdr_len,
|
|
|
|
vdev->guest_features);
|
|
|
|
}
|
|
|
|
return -1;
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
|
|
|
|
2016-02-04 16:26:51 +02:00
|
|
|
if (elem->in_num < 1) {
|
2016-09-30 17:13:24 +02:00
|
|
|
virtio_error(vdev,
|
|
|
|
"virtio-net receive queue contains no in buffers");
|
|
|
|
virtqueue_detach_element(q->rx_vq, elem, 0);
|
|
|
|
g_free(elem);
|
|
|
|
return -1;
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
|
|
|
|
2016-02-04 16:26:51 +02:00
|
|
|
sg = elem->in_sg;
|
2008-12-17 19:13:11 +00:00
|
|
|
if (i == 0) {
|
2012-09-24 13:26:55 +02:00
|
|
|
assert(offset == 0);
|
2012-09-24 13:17:13 +02:00
|
|
|
if (n->mergeable_rx_bufs) {
|
|
|
|
mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
|
2016-02-04 16:26:51 +02:00
|
|
|
sg, elem->in_num,
|
2012-09-24 13:17:13 +02:00
|
|
|
offsetof(typeof(mhdr), num_buffers),
|
|
|
|
sizeof(mhdr.num_buffers));
|
|
|
|
}
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2016-02-04 16:26:51 +02:00
|
|
|
receive_header(n, sg, elem->in_num, buf, size);
|
2020-05-08 15:59:31 +03:00
|
|
|
if (n->rss_data.populate_hash) {
|
|
|
|
offset = sizeof(mhdr);
|
|
|
|
iov_from_buf(sg, elem->in_num, offset,
|
|
|
|
buf + offset, n->host_hdr_len - sizeof(mhdr));
|
|
|
|
}
|
2012-09-24 13:26:55 +02:00
|
|
|
offset = n->host_hdr_len;
|
2012-09-24 12:12:25 +02:00
|
|
|
total += n->guest_hdr_len;
|
2012-09-24 13:14:16 +02:00
|
|
|
guest_offset = n->guest_hdr_len;
|
|
|
|
} else {
|
|
|
|
guest_offset = 0;
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* copy in packet. ugh */
|
2016-02-04 16:26:51 +02:00
|
|
|
len = iov_from_buf(sg, elem->in_num, guest_offset,
|
change iov_* function prototypes to be more appropriate
Reorder arguments to be more natural, readable and
consistent with other iov_* functions, and change
argument names, from:
iov_from_buf(iov, iov_cnt, buf, iov_off, size)
to
iov_from_buf(iov, iov_cnt, offset, buf, bytes)
The result becomes natural English:
copy data to this `iov' vector with `iov_cnt'
elements starting at byte offset `offset'
from memory buffer `buf', processing `bytes'
bytes max.
(Try to read the original prototype this way).
Also change iov_clear() to more general iov_memset()
(it uses memset() internally anyway).
While at it, add comments to the header file
describing what the routines actually does.
The patch only renames argumens in the header, but
keeps old names in the implementation. The next
patch will touch actual code to match.
Now, it might look wrong to pay so much attention
to so small things. But we've so many badly designed
interfaces already so the whole thing becomes rather
confusing or error prone. One example of this is
previous commit and small discussion which emerged
from it, with an outcome that the utility functions
like these aren't well-understdandable, leading to
strange usage cases. That's why I paid quite some
attention to this set of functions and a few
others in subsequent patches.
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
2012-03-11 18:05:12 +04:00
|
|
|
buf + offset, size - offset);
|
2008-12-17 19:13:11 +00:00
|
|
|
total += len;
|
2010-06-22 16:22:49 +03:00
|
|
|
offset += len;
|
|
|
|
/* If buffers can't be merged, at this point we
|
|
|
|
* must have consumed the complete packet.
|
|
|
|
* Otherwise, drop it. */
|
|
|
|
if (!n->mergeable_rx_bufs && offset < size) {
|
2016-11-03 09:55:49 +01:00
|
|
|
virtqueue_unpop(q->rx_vq, elem, total);
|
2016-02-04 16:26:51 +02:00
|
|
|
g_free(elem);
|
2010-06-22 16:22:49 +03:00
|
|
|
return size;
|
|
|
|
}
|
2008-12-17 19:13:11 +00:00
|
|
|
|
|
|
|
/* signal other side */
|
2016-02-04 16:26:51 +02:00
|
|
|
virtqueue_fill(q->rx_vq, elem, total, i++);
|
|
|
|
g_free(elem);
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
|
|
|
|
2012-09-24 13:17:13 +02:00
|
|
|
if (mhdr_cnt) {
|
2014-06-24 19:42:54 +02:00
|
|
|
virtio_stw_p(vdev, &mhdr.num_buffers, i);
|
2012-09-24 13:17:13 +02:00
|
|
|
iov_from_buf(mhdr_sg, mhdr_cnt,
|
|
|
|
0,
|
|
|
|
&mhdr.num_buffers, sizeof mhdr.num_buffers);
|
2011-01-25 11:55:14 +01:00
|
|
|
}
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2013-01-30 19:12:38 +08:00
|
|
|
virtqueue_flush(q->rx_vq, i);
|
2013-04-11 16:30:01 +02:00
|
|
|
virtio_notify(vdev, q->rx_vq);
|
2009-05-18 13:40:55 +01:00
|
|
|
|
|
|
|
return size;
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
|
|
|
|
virtio-net: support RSC v4/v6 tcp traffic for Windows HCK
This commit adds implementation of RX packets
coalescing, compatible with requirements of Windows
Hardware compatibility kit.
The device enables feature VIRTIO_NET_F_RSC_EXT in
host features if it supports extended RSC functionality
as defined in the specification.
This feature requires at least one of VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6. Windows guest driver acks
this feature only if VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
is also present.
If the guest driver acks VIRTIO_NET_F_RSC_EXT feature,
the device coalesces TCPv4 and TCPv6 packets (if
respective VIRTIO_NET_F_GUEST_TSO feature is on,
populates extended RSC information in virtio header
and sets VIRTIO_NET_HDR_F_RSC_INFO bit in header flags.
The device does not recalculate checksums in the coalesced
packet, so they are not valid.
In this case:
All the data packets in a tcp connection are cached
to a single buffer in every receive interval, and will
be sent out via a timer, the 'virtio_net_rsc_timeout'
controls the interval, this value may impact the
performance and response time of tcp connection,
50000(50us) is an experience value to gain a performance
improvement, since the whql test sends packets every 100us,
so '300000(300us)' passes the test case, it is the default
value as well, tune it via the command line parameter
'rsc_interval' within 'virtio-net-pci' device, for example,
to launch a guest with interval set as '500000':
'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,
guest_rsc_ext=on,rsc_interval=500000'
The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.
'NetRscChain' is used to save the segments of IPv4/6 in a
VirtIONet device.
A new segment becomes a 'Candidate' as well as it passed sanity check,
the main handler of TCP includes TCP window update, duplicated
ACK check and the real data coalescing.
An 'Candidate' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.
Sanity check includes:
1. Incorrect version in IP header
2. An IP options or IP fragment
3. Not a TCP packet
4. Sanity size check to prevent buffer overflow attack.
5. An ECN packet
Even though, there might more cases should be considered such as
ip identification other flags, while it breaks the test because
windows set it to the same even it's not a fragment.
Normally it includes 2 typical ways to handle a TCP control flag,
'bypass' and 'finalize', 'bypass' means should be sent out directly,
while 'finalize' means the packets should also be bypassed, but this
should be done after search for the same connection packets in the
pool and drain all of them out, this is to avoid out of order fragment.
All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a
finalization, because this normally happens upon a connection is going
to be closed, an 'URG' packet also finalize current coalescing unit.
Statistics can be used to monitor the basic coalescing status, the
'out of order' and 'out of window' means how many retransmitting packets,
thus describe the performance intuitively.
Difference between ip v4 and v6 processing:
Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real 65535 payload.
Note that main goal of implementing this feature in software
is to create reference setup for certification tests. In such
setups guest migration is not required, so the coalesced packets
not yet delivered to the guest will be lost in case of migration.
Signed-off-by: Wei Xu <wexu@redhat.com>
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-01-03 15:12:29 +02:00
|
|
|
static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
|
2017-01-27 16:40:20 +01:00
|
|
|
size_t size)
|
|
|
|
{
|
2019-10-25 11:34:03 +01:00
|
|
|
RCU_READ_LOCK_GUARD();
|
2017-01-27 16:40:20 +01:00
|
|
|
|
2020-05-08 15:59:29 +03:00
|
|
|
return virtio_net_receive_rcu(nc, buf, size, false);
|
2017-01-27 16:40:20 +01:00
|
|
|
}
|
|
|
|
|
virtio-net: support RSC v4/v6 tcp traffic for Windows HCK
This commit adds implementation of RX packets
coalescing, compatible with requirements of Windows
Hardware compatibility kit.
The device enables feature VIRTIO_NET_F_RSC_EXT in
host features if it supports extended RSC functionality
as defined in the specification.
This feature requires at least one of VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6. Windows guest driver acks
this feature only if VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
is also present.
If the guest driver acks VIRTIO_NET_F_RSC_EXT feature,
the device coalesces TCPv4 and TCPv6 packets (if
respective VIRTIO_NET_F_GUEST_TSO feature is on,
populates extended RSC information in virtio header
and sets VIRTIO_NET_HDR_F_RSC_INFO bit in header flags.
The device does not recalculate checksums in the coalesced
packet, so they are not valid.
In this case:
All the data packets in a tcp connection are cached
to a single buffer in every receive interval, and will
be sent out via a timer, the 'virtio_net_rsc_timeout'
controls the interval, this value may impact the
performance and response time of tcp connection,
50000(50us) is an experience value to gain a performance
improvement, since the whql test sends packets every 100us,
so '300000(300us)' passes the test case, it is the default
value as well, tune it via the command line parameter
'rsc_interval' within 'virtio-net-pci' device, for example,
to launch a guest with interval set as '500000':
'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,
guest_rsc_ext=on,rsc_interval=500000'
The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.
'NetRscChain' is used to save the segments of IPv4/6 in a
VirtIONet device.
A new segment becomes a 'Candidate' as well as it passed sanity check,
the main handler of TCP includes TCP window update, duplicated
ACK check and the real data coalescing.
An 'Candidate' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.
Sanity check includes:
1. Incorrect version in IP header
2. An IP options or IP fragment
3. Not a TCP packet
4. Sanity size check to prevent buffer overflow attack.
5. An ECN packet
Even though, there might more cases should be considered such as
ip identification other flags, while it breaks the test because
windows set it to the same even it's not a fragment.
Normally it includes 2 typical ways to handle a TCP control flag,
'bypass' and 'finalize', 'bypass' means should be sent out directly,
while 'finalize' means the packets should also be bypassed, but this
should be done after search for the same connection packets in the
pool and drain all of them out, this is to avoid out of order fragment.
All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a
finalization, because this normally happens upon a connection is going
to be closed, an 'URG' packet also finalize current coalescing unit.
Statistics can be used to monitor the basic coalescing status, the
'out of order' and 'out of window' means how many retransmitting packets,
thus describe the performance intuitively.
Difference between ip v4 and v6 processing:
Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real 65535 payload.
Note that main goal of implementing this feature in software
is to create reference setup for certification tests. In such
setups guest migration is not required, so the coalesced packets
not yet delivered to the guest will be lost in case of migration.
Signed-off-by: Wei Xu <wexu@redhat.com>
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-01-03 15:12:29 +02:00
|
|
|
static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
|
|
|
|
const uint8_t *buf,
|
|
|
|
VirtioNetRscUnit *unit)
|
|
|
|
{
|
|
|
|
uint16_t ip_hdrlen;
|
|
|
|
struct ip_header *ip;
|
|
|
|
|
|
|
|
ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
|
|
|
|
+ sizeof(struct eth_header));
|
|
|
|
unit->ip = (void *)ip;
|
|
|
|
ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
|
|
|
|
unit->ip_plen = &ip->ip_len;
|
|
|
|
unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
|
|
|
|
unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
|
|
|
|
unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
|
|
|
|
const uint8_t *buf,
|
|
|
|
VirtioNetRscUnit *unit)
|
|
|
|
{
|
|
|
|
struct ip6_header *ip6;
|
|
|
|
|
|
|
|
ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
|
|
|
|
+ sizeof(struct eth_header));
|
|
|
|
unit->ip = ip6;
|
|
|
|
unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
|
2020-04-13 00:35:56 +02:00
|
|
|
unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
|
virtio-net: support RSC v4/v6 tcp traffic for Windows HCK
This commit adds implementation of RX packets
coalescing, compatible with requirements of Windows
Hardware compatibility kit.
The device enables feature VIRTIO_NET_F_RSC_EXT in
host features if it supports extended RSC functionality
as defined in the specification.
This feature requires at least one of VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6. Windows guest driver acks
this feature only if VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
is also present.
If the guest driver acks VIRTIO_NET_F_RSC_EXT feature,
the device coalesces TCPv4 and TCPv6 packets (if
respective VIRTIO_NET_F_GUEST_TSO feature is on,
populates extended RSC information in virtio header
and sets VIRTIO_NET_HDR_F_RSC_INFO bit in header flags.
The device does not recalculate checksums in the coalesced
packet, so they are not valid.
In this case:
All the data packets in a tcp connection are cached
to a single buffer in every receive interval, and will
be sent out via a timer, the 'virtio_net_rsc_timeout'
controls the interval, this value may impact the
performance and response time of tcp connection,
50000(50us) is an experience value to gain a performance
improvement, since the whql test sends packets every 100us,
so '300000(300us)' passes the test case, it is the default
value as well, tune it via the command line parameter
'rsc_interval' within 'virtio-net-pci' device, for example,
to launch a guest with interval set as '500000':
'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,
guest_rsc_ext=on,rsc_interval=500000'
The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.
'NetRscChain' is used to save the segments of IPv4/6 in a
VirtIONet device.
A new segment becomes a 'Candidate' as well as it passed sanity check,
the main handler of TCP includes TCP window update, duplicated
ACK check and the real data coalescing.
An 'Candidate' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.
Sanity check includes:
1. Incorrect version in IP header
2. An IP options or IP fragment
3. Not a TCP packet
4. Sanity size check to prevent buffer overflow attack.
5. An ECN packet
Even though, there might more cases should be considered such as
ip identification other flags, while it breaks the test because
windows set it to the same even it's not a fragment.
Normally it includes 2 typical ways to handle a TCP control flag,
'bypass' and 'finalize', 'bypass' means should be sent out directly,
while 'finalize' means the packets should also be bypassed, but this
should be done after search for the same connection packets in the
pool and drain all of them out, this is to avoid out of order fragment.
All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a
finalization, because this normally happens upon a connection is going
to be closed, an 'URG' packet also finalize current coalescing unit.
Statistics can be used to monitor the basic coalescing status, the
'out of order' and 'out of window' means how many retransmitting packets,
thus describe the performance intuitively.
Difference between ip v4 and v6 processing:
Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real 65535 payload.
Note that main goal of implementing this feature in software
is to create reference setup for certification tests. In such
setups guest migration is not required, so the coalesced packets
not yet delivered to the guest will be lost in case of migration.
Signed-off-by: Wei Xu <wexu@redhat.com>
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-01-03 15:12:29 +02:00
|
|
|
+ sizeof(struct ip6_header));
|
|
|
|
unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
|
|
|
|
|
|
|
|
/* There is a difference between payload lenght in ipv4 and v6,
|
|
|
|
ip header is excluded in ipv6 */
|
|
|
|
unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
|
|
|
|
VirtioNetRscSeg *seg)
|
|
|
|
{
|
|
|
|
int ret;
|
2020-05-08 15:59:34 +03:00
|
|
|
struct virtio_net_hdr_v1 *h;
|
virtio-net: support RSC v4/v6 tcp traffic for Windows HCK
This commit adds implementation of RX packets
coalescing, compatible with requirements of Windows
Hardware compatibility kit.
The device enables feature VIRTIO_NET_F_RSC_EXT in
host features if it supports extended RSC functionality
as defined in the specification.
This feature requires at least one of VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6. Windows guest driver acks
this feature only if VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
is also present.
If the guest driver acks VIRTIO_NET_F_RSC_EXT feature,
the device coalesces TCPv4 and TCPv6 packets (if
respective VIRTIO_NET_F_GUEST_TSO feature is on,
populates extended RSC information in virtio header
and sets VIRTIO_NET_HDR_F_RSC_INFO bit in header flags.
The device does not recalculate checksums in the coalesced
packet, so they are not valid.
In this case:
All the data packets in a tcp connection are cached
to a single buffer in every receive interval, and will
be sent out via a timer, the 'virtio_net_rsc_timeout'
controls the interval, this value may impact the
performance and response time of tcp connection,
50000(50us) is an experience value to gain a performance
improvement, since the whql test sends packets every 100us,
so '300000(300us)' passes the test case, it is the default
value as well, tune it via the command line parameter
'rsc_interval' within 'virtio-net-pci' device, for example,
to launch a guest with interval set as '500000':
'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,
guest_rsc_ext=on,rsc_interval=500000'
The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.
'NetRscChain' is used to save the segments of IPv4/6 in a
VirtIONet device.
A new segment becomes a 'Candidate' as well as it passed sanity check,
the main handler of TCP includes TCP window update, duplicated
ACK check and the real data coalescing.
An 'Candidate' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.
Sanity check includes:
1. Incorrect version in IP header
2. An IP options or IP fragment
3. Not a TCP packet
4. Sanity size check to prevent buffer overflow attack.
5. An ECN packet
Even though, there might more cases should be considered such as
ip identification other flags, while it breaks the test because
windows set it to the same even it's not a fragment.
Normally it includes 2 typical ways to handle a TCP control flag,
'bypass' and 'finalize', 'bypass' means should be sent out directly,
while 'finalize' means the packets should also be bypassed, but this
should be done after search for the same connection packets in the
pool and drain all of them out, this is to avoid out of order fragment.
All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a
finalization, because this normally happens upon a connection is going
to be closed, an 'URG' packet also finalize current coalescing unit.
Statistics can be used to monitor the basic coalescing status, the
'out of order' and 'out of window' means how many retransmitting packets,
thus describe the performance intuitively.
Difference between ip v4 and v6 processing:
Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real 65535 payload.
Note that main goal of implementing this feature in software
is to create reference setup for certification tests. In such
setups guest migration is not required, so the coalesced packets
not yet delivered to the guest will be lost in case of migration.
Signed-off-by: Wei Xu <wexu@redhat.com>
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-01-03 15:12:29 +02:00
|
|
|
|
2020-05-08 15:59:34 +03:00
|
|
|
h = (struct virtio_net_hdr_v1 *)seg->buf;
|
virtio-net: support RSC v4/v6 tcp traffic for Windows HCK
This commit adds implementation of RX packets
coalescing, compatible with requirements of Windows
Hardware compatibility kit.
The device enables feature VIRTIO_NET_F_RSC_EXT in
host features if it supports extended RSC functionality
as defined in the specification.
This feature requires at least one of VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6. Windows guest driver acks
this feature only if VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
is also present.
If the guest driver acks VIRTIO_NET_F_RSC_EXT feature,
the device coalesces TCPv4 and TCPv6 packets (if
respective VIRTIO_NET_F_GUEST_TSO feature is on,
populates extended RSC information in virtio header
and sets VIRTIO_NET_HDR_F_RSC_INFO bit in header flags.
The device does not recalculate checksums in the coalesced
packet, so they are not valid.
In this case:
All the data packets in a tcp connection are cached
to a single buffer in every receive interval, and will
be sent out via a timer, the 'virtio_net_rsc_timeout'
controls the interval, this value may impact the
performance and response time of tcp connection,
50000(50us) is an experience value to gain a performance
improvement, since the whql test sends packets every 100us,
so '300000(300us)' passes the test case, it is the default
value as well, tune it via the command line parameter
'rsc_interval' within 'virtio-net-pci' device, for example,
to launch a guest with interval set as '500000':
'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,
guest_rsc_ext=on,rsc_interval=500000'
The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.
'NetRscChain' is used to save the segments of IPv4/6 in a
VirtIONet device.
A new segment becomes a 'Candidate' as well as it passed sanity check,
the main handler of TCP includes TCP window update, duplicated
ACK check and the real data coalescing.
An 'Candidate' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.
Sanity check includes:
1. Incorrect version in IP header
2. An IP options or IP fragment
3. Not a TCP packet
4. Sanity size check to prevent buffer overflow attack.
5. An ECN packet
Even though, there might more cases should be considered such as
ip identification other flags, while it breaks the test because
windows set it to the same even it's not a fragment.
Normally it includes 2 typical ways to handle a TCP control flag,
'bypass' and 'finalize', 'bypass' means should be sent out directly,
while 'finalize' means the packets should also be bypassed, but this
should be done after search for the same connection packets in the
pool and drain all of them out, this is to avoid out of order fragment.
All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a
finalization, because this normally happens upon a connection is going
to be closed, an 'URG' packet also finalize current coalescing unit.
Statistics can be used to monitor the basic coalescing status, the
'out of order' and 'out of window' means how many retransmitting packets,
thus describe the performance intuitively.
Difference between ip v4 and v6 processing:
Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real 65535 payload.
Note that main goal of implementing this feature in software
is to create reference setup for certification tests. In such
setups guest migration is not required, so the coalesced packets
not yet delivered to the guest will be lost in case of migration.
Signed-off-by: Wei Xu <wexu@redhat.com>
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-01-03 15:12:29 +02:00
|
|
|
h->flags = 0;
|
|
|
|
h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
|
|
|
|
|
|
|
|
if (seg->is_coalesced) {
|
2020-05-08 15:59:34 +03:00
|
|
|
h->rsc.segments = seg->packets;
|
|
|
|
h->rsc.dup_acks = seg->dup_ack;
|
virtio-net: support RSC v4/v6 tcp traffic for Windows HCK
This commit adds implementation of RX packets
coalescing, compatible with requirements of Windows
Hardware compatibility kit.
The device enables feature VIRTIO_NET_F_RSC_EXT in
host features if it supports extended RSC functionality
as defined in the specification.
This feature requires at least one of VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6. Windows guest driver acks
this feature only if VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
is also present.
If the guest driver acks VIRTIO_NET_F_RSC_EXT feature,
the device coalesces TCPv4 and TCPv6 packets (if
respective VIRTIO_NET_F_GUEST_TSO feature is on,
populates extended RSC information in virtio header
and sets VIRTIO_NET_HDR_F_RSC_INFO bit in header flags.
The device does not recalculate checksums in the coalesced
packet, so they are not valid.
In this case:
All the data packets in a tcp connection are cached
to a single buffer in every receive interval, and will
be sent out via a timer, the 'virtio_net_rsc_timeout'
controls the interval, this value may impact the
performance and response time of tcp connection,
50000(50us) is an experience value to gain a performance
improvement, since the whql test sends packets every 100us,
so '300000(300us)' passes the test case, it is the default
value as well, tune it via the command line parameter
'rsc_interval' within 'virtio-net-pci' device, for example,
to launch a guest with interval set as '500000':
'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,
guest_rsc_ext=on,rsc_interval=500000'
The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.
'NetRscChain' is used to save the segments of IPv4/6 in a
VirtIONet device.
A new segment becomes a 'Candidate' as well as it passed sanity check,
the main handler of TCP includes TCP window update, duplicated
ACK check and the real data coalescing.
An 'Candidate' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.
Sanity check includes:
1. Incorrect version in IP header
2. An IP options or IP fragment
3. Not a TCP packet
4. Sanity size check to prevent buffer overflow attack.
5. An ECN packet
Even though, there might more cases should be considered such as
ip identification other flags, while it breaks the test because
windows set it to the same even it's not a fragment.
Normally it includes 2 typical ways to handle a TCP control flag,
'bypass' and 'finalize', 'bypass' means should be sent out directly,
while 'finalize' means the packets should also be bypassed, but this
should be done after search for the same connection packets in the
pool and drain all of them out, this is to avoid out of order fragment.
All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a
finalization, because this normally happens upon a connection is going
to be closed, an 'URG' packet also finalize current coalescing unit.
Statistics can be used to monitor the basic coalescing status, the
'out of order' and 'out of window' means how many retransmitting packets,
thus describe the performance intuitively.
Difference between ip v4 and v6 processing:
Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real 65535 payload.
Note that main goal of implementing this feature in software
is to create reference setup for certification tests. In such
setups guest migration is not required, so the coalesced packets
not yet delivered to the guest will be lost in case of migration.
Signed-off-by: Wei Xu <wexu@redhat.com>
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-01-03 15:12:29 +02:00
|
|
|
h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
|
|
|
|
if (chain->proto == ETH_P_IP) {
|
|
|
|
h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
|
|
|
|
} else {
|
|
|
|
h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
|
|
|
|
QTAILQ_REMOVE(&chain->buffers, seg, next);
|
|
|
|
g_free(seg->buf);
|
|
|
|
g_free(seg);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_net_rsc_purge(void *opq)
|
|
|
|
{
|
|
|
|
VirtioNetRscSeg *seg, *rn;
|
|
|
|
VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
|
|
|
|
|
|
|
|
QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
|
|
|
|
if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
|
|
|
|
chain->stat.purge_failed++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
chain->stat.timer++;
|
|
|
|
if (!QTAILQ_EMPTY(&chain->buffers)) {
|
|
|
|
timer_mod(chain->drain_timer,
|
|
|
|
qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_net_rsc_cleanup(VirtIONet *n)
|
|
|
|
{
|
|
|
|
VirtioNetRscChain *chain, *rn_chain;
|
|
|
|
VirtioNetRscSeg *seg, *rn_seg;
|
|
|
|
|
|
|
|
QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
|
|
|
|
QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
|
|
|
|
QTAILQ_REMOVE(&chain->buffers, seg, next);
|
|
|
|
g_free(seg->buf);
|
|
|
|
g_free(seg);
|
|
|
|
}
|
|
|
|
|
|
|
|
timer_free(chain->drain_timer);
|
|
|
|
QTAILQ_REMOVE(&n->rsc_chains, chain, next);
|
|
|
|
g_free(chain);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
|
|
|
|
NetClientState *nc,
|
|
|
|
const uint8_t *buf, size_t size)
|
|
|
|
{
|
|
|
|
uint16_t hdr_len;
|
|
|
|
VirtioNetRscSeg *seg;
|
|
|
|
|
|
|
|
hdr_len = chain->n->guest_hdr_len;
|
|
|
|
seg = g_malloc(sizeof(VirtioNetRscSeg));
|
|
|
|
seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
|
|
|
|
+ sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
|
|
|
|
memcpy(seg->buf, buf, size);
|
|
|
|
seg->size = size;
|
|
|
|
seg->packets = 1;
|
|
|
|
seg->dup_ack = 0;
|
|
|
|
seg->is_coalesced = 0;
|
|
|
|
seg->nc = nc;
|
|
|
|
|
|
|
|
QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
|
|
|
|
chain->stat.cache++;
|
|
|
|
|
|
|
|
switch (chain->proto) {
|
|
|
|
case ETH_P_IP:
|
|
|
|
virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
|
|
|
|
break;
|
|
|
|
case ETH_P_IPV6:
|
|
|
|
virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
g_assert_not_reached();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
|
|
|
|
VirtioNetRscSeg *seg,
|
|
|
|
const uint8_t *buf,
|
|
|
|
struct tcp_header *n_tcp,
|
|
|
|
struct tcp_header *o_tcp)
|
|
|
|
{
|
|
|
|
uint32_t nack, oack;
|
|
|
|
uint16_t nwin, owin;
|
|
|
|
|
|
|
|
nack = htonl(n_tcp->th_ack);
|
|
|
|
nwin = htons(n_tcp->th_win);
|
|
|
|
oack = htonl(o_tcp->th_ack);
|
|
|
|
owin = htons(o_tcp->th_win);
|
|
|
|
|
|
|
|
if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
|
|
|
|
chain->stat.ack_out_of_win++;
|
|
|
|
return RSC_FINAL;
|
|
|
|
} else if (nack == oack) {
|
|
|
|
/* duplicated ack or window probe */
|
|
|
|
if (nwin == owin) {
|
|
|
|
/* duplicated ack, add dup ack count due to whql test up to 1 */
|
|
|
|
chain->stat.dup_ack++;
|
|
|
|
return RSC_FINAL;
|
|
|
|
} else {
|
|
|
|
/* Coalesce window update */
|
|
|
|
o_tcp->th_win = n_tcp->th_win;
|
|
|
|
chain->stat.win_update++;
|
|
|
|
return RSC_COALESCE;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* pure ack, go to 'C', finalize*/
|
|
|
|
chain->stat.pure_ack++;
|
|
|
|
return RSC_FINAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
|
|
|
|
VirtioNetRscSeg *seg,
|
|
|
|
const uint8_t *buf,
|
|
|
|
VirtioNetRscUnit *n_unit)
|
|
|
|
{
|
|
|
|
void *data;
|
|
|
|
uint16_t o_ip_len;
|
|
|
|
uint32_t nseq, oseq;
|
|
|
|
VirtioNetRscUnit *o_unit;
|
|
|
|
|
|
|
|
o_unit = &seg->unit;
|
|
|
|
o_ip_len = htons(*o_unit->ip_plen);
|
|
|
|
nseq = htonl(n_unit->tcp->th_seq);
|
|
|
|
oseq = htonl(o_unit->tcp->th_seq);
|
|
|
|
|
|
|
|
/* out of order or retransmitted. */
|
|
|
|
if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
|
|
|
|
chain->stat.data_out_of_win++;
|
|
|
|
return RSC_FINAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
|
|
|
|
if (nseq == oseq) {
|
|
|
|
if ((o_unit->payload == 0) && n_unit->payload) {
|
|
|
|
/* From no payload to payload, normal case, not a dup ack or etc */
|
|
|
|
chain->stat.data_after_pure_ack++;
|
|
|
|
goto coalesce;
|
|
|
|
} else {
|
|
|
|
return virtio_net_rsc_handle_ack(chain, seg, buf,
|
|
|
|
n_unit->tcp, o_unit->tcp);
|
|
|
|
}
|
|
|
|
} else if ((nseq - oseq) != o_unit->payload) {
|
|
|
|
/* Not a consistent packet, out of order */
|
|
|
|
chain->stat.data_out_of_order++;
|
|
|
|
return RSC_FINAL;
|
|
|
|
} else {
|
|
|
|
coalesce:
|
|
|
|
if ((o_ip_len + n_unit->payload) > chain->max_payload) {
|
|
|
|
chain->stat.over_size++;
|
|
|
|
return RSC_FINAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Here comes the right data, the payload length in v4/v6 is different,
|
|
|
|
so use the field value to update and record the new data len */
|
|
|
|
o_unit->payload += n_unit->payload; /* update new data len */
|
|
|
|
|
|
|
|
/* update field in ip header */
|
|
|
|
*o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
|
|
|
|
|
|
|
|
/* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
|
|
|
|
for windows guest, while this may change the behavior for linux
|
|
|
|
guest (only if it uses RSC feature). */
|
|
|
|
o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
|
|
|
|
|
|
|
|
o_unit->tcp->th_ack = n_unit->tcp->th_ack;
|
|
|
|
o_unit->tcp->th_win = n_unit->tcp->th_win;
|
|
|
|
|
|
|
|
memmove(seg->buf + seg->size, data, n_unit->payload);
|
|
|
|
seg->size += n_unit->payload;
|
|
|
|
seg->packets++;
|
|
|
|
chain->stat.coalesced++;
|
|
|
|
return RSC_COALESCE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
|
|
|
|
VirtioNetRscSeg *seg,
|
|
|
|
const uint8_t *buf, size_t size,
|
|
|
|
VirtioNetRscUnit *unit)
|
|
|
|
{
|
|
|
|
struct ip_header *ip1, *ip2;
|
|
|
|
|
|
|
|
ip1 = (struct ip_header *)(unit->ip);
|
|
|
|
ip2 = (struct ip_header *)(seg->unit.ip);
|
|
|
|
if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
|
|
|
|
|| (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
|
|
|
|
|| (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
|
|
|
|
chain->stat.no_match++;
|
|
|
|
return RSC_NO_MATCH;
|
|
|
|
}
|
|
|
|
|
|
|
|
return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
|
|
|
|
VirtioNetRscSeg *seg,
|
|
|
|
const uint8_t *buf, size_t size,
|
|
|
|
VirtioNetRscUnit *unit)
|
|
|
|
{
|
|
|
|
struct ip6_header *ip1, *ip2;
|
|
|
|
|
|
|
|
ip1 = (struct ip6_header *)(unit->ip);
|
|
|
|
ip2 = (struct ip6_header *)(seg->unit.ip);
|
|
|
|
if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
|
|
|
|
|| memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
|
|
|
|
|| (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
|
|
|
|
|| (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
|
|
|
|
chain->stat.no_match++;
|
|
|
|
return RSC_NO_MATCH;
|
|
|
|
}
|
|
|
|
|
|
|
|
return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Packets with 'SYN' should bypass, other flag should be sent after drain
|
|
|
|
* to prevent out of order */
|
|
|
|
static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
|
|
|
|
struct tcp_header *tcp)
|
|
|
|
{
|
|
|
|
uint16_t tcp_hdr;
|
|
|
|
uint16_t tcp_flag;
|
|
|
|
|
|
|
|
tcp_flag = htons(tcp->th_offset_flags);
|
|
|
|
tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
|
|
|
|
tcp_flag &= VIRTIO_NET_TCP_FLAG;
|
|
|
|
if (tcp_flag & TH_SYN) {
|
|
|
|
chain->stat.tcp_syn++;
|
|
|
|
return RSC_BYPASS;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
|
|
|
|
chain->stat.tcp_ctrl_drain++;
|
|
|
|
return RSC_FINAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tcp_hdr > sizeof(struct tcp_header)) {
|
|
|
|
chain->stat.tcp_all_opt++;
|
|
|
|
return RSC_FINAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return RSC_CANDIDATE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
|
|
|
|
NetClientState *nc,
|
|
|
|
const uint8_t *buf, size_t size,
|
|
|
|
VirtioNetRscUnit *unit)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
VirtioNetRscSeg *seg, *nseg;
|
|
|
|
|
|
|
|
if (QTAILQ_EMPTY(&chain->buffers)) {
|
|
|
|
chain->stat.empty_cache++;
|
|
|
|
virtio_net_rsc_cache_buf(chain, nc, buf, size);
|
|
|
|
timer_mod(chain->drain_timer,
|
|
|
|
qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
|
|
|
|
if (chain->proto == ETH_P_IP) {
|
|
|
|
ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
|
|
|
|
} else {
|
|
|
|
ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret == RSC_FINAL) {
|
|
|
|
if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
|
|
|
|
/* Send failed */
|
|
|
|
chain->stat.final_failed++;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Send current packet */
|
|
|
|
return virtio_net_do_receive(nc, buf, size);
|
|
|
|
} else if (ret == RSC_NO_MATCH) {
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
/* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
|
|
|
|
seg->is_coalesced = 1;
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
chain->stat.no_match_cache++;
|
|
|
|
virtio_net_rsc_cache_buf(chain, nc, buf, size);
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Drain a connection data, this is to avoid out of order segments */
|
|
|
|
static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
|
|
|
|
NetClientState *nc,
|
|
|
|
const uint8_t *buf, size_t size,
|
|
|
|
uint16_t ip_start, uint16_t ip_size,
|
|
|
|
uint16_t tcp_port)
|
|
|
|
{
|
|
|
|
VirtioNetRscSeg *seg, *nseg;
|
|
|
|
uint32_t ppair1, ppair2;
|
|
|
|
|
|
|
|
ppair1 = *(uint32_t *)(buf + tcp_port);
|
|
|
|
QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
|
|
|
|
ppair2 = *(uint32_t *)(seg->buf + tcp_port);
|
|
|
|
if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
|
|
|
|
|| (ppair1 != ppair2)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
|
|
|
|
chain->stat.drain_failed++;
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return virtio_net_do_receive(nc, buf, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
|
|
|
|
struct ip_header *ip,
|
|
|
|
const uint8_t *buf, size_t size)
|
|
|
|
{
|
|
|
|
uint16_t ip_len;
|
|
|
|
|
|
|
|
/* Not an ipv4 packet */
|
|
|
|
if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
|
|
|
|
chain->stat.ip_option++;
|
|
|
|
return RSC_BYPASS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Don't handle packets with ip option */
|
|
|
|
if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
|
|
|
|
chain->stat.ip_option++;
|
|
|
|
return RSC_BYPASS;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ip->ip_p != IPPROTO_TCP) {
|
|
|
|
chain->stat.bypass_not_tcp++;
|
|
|
|
return RSC_BYPASS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Don't handle packets with ip fragment */
|
|
|
|
if (!(htons(ip->ip_off) & IP_DF)) {
|
|
|
|
chain->stat.ip_frag++;
|
|
|
|
return RSC_BYPASS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Don't handle packets with ecn flag */
|
|
|
|
if (IPTOS_ECN(ip->ip_tos)) {
|
|
|
|
chain->stat.ip_ecn++;
|
|
|
|
return RSC_BYPASS;
|
|
|
|
}
|
|
|
|
|
|
|
|
ip_len = htons(ip->ip_len);
|
|
|
|
if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
|
|
|
|
|| ip_len > (size - chain->n->guest_hdr_len -
|
|
|
|
sizeof(struct eth_header))) {
|
|
|
|
chain->stat.ip_hacked++;
|
|
|
|
return RSC_BYPASS;
|
|
|
|
}
|
|
|
|
|
|
|
|
return RSC_CANDIDATE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
|
|
|
|
NetClientState *nc,
|
|
|
|
const uint8_t *buf, size_t size)
|
|
|
|
{
|
|
|
|
int32_t ret;
|
|
|
|
uint16_t hdr_len;
|
|
|
|
VirtioNetRscUnit unit;
|
|
|
|
|
|
|
|
hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
|
|
|
|
|
|
|
|
if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
|
|
|
|
+ sizeof(struct tcp_header))) {
|
|
|
|
chain->stat.bypass_not_tcp++;
|
|
|
|
return virtio_net_do_receive(nc, buf, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
virtio_net_rsc_extract_unit4(chain, buf, &unit);
|
|
|
|
if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
|
|
|
|
!= RSC_CANDIDATE) {
|
|
|
|
return virtio_net_do_receive(nc, buf, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
|
|
|
|
if (ret == RSC_BYPASS) {
|
|
|
|
return virtio_net_do_receive(nc, buf, size);
|
|
|
|
} else if (ret == RSC_FINAL) {
|
|
|
|
return virtio_net_rsc_drain_flow(chain, nc, buf, size,
|
|
|
|
((hdr_len + sizeof(struct eth_header)) + 12),
|
|
|
|
VIRTIO_NET_IP4_ADDR_SIZE,
|
|
|
|
hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
|
|
|
|
}
|
|
|
|
|
|
|
|
return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
|
|
|
|
struct ip6_header *ip6,
|
|
|
|
const uint8_t *buf, size_t size)
|
|
|
|
{
|
|
|
|
uint16_t ip_len;
|
|
|
|
|
|
|
|
if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
|
|
|
|
!= IP_HEADER_VERSION_6) {
|
|
|
|
return RSC_BYPASS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Both option and protocol is checked in this */
|
|
|
|
if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
|
|
|
|
chain->stat.bypass_not_tcp++;
|
|
|
|
return RSC_BYPASS;
|
|
|
|
}
|
|
|
|
|
|
|
|
ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
|
|
|
|
if (ip_len < sizeof(struct tcp_header) ||
|
|
|
|
ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
|
|
|
|
- sizeof(struct ip6_header))) {
|
|
|
|
chain->stat.ip_hacked++;
|
|
|
|
return RSC_BYPASS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Don't handle packets with ecn flag */
|
|
|
|
if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
|
|
|
|
chain->stat.ip_ecn++;
|
|
|
|
return RSC_BYPASS;
|
|
|
|
}
|
|
|
|
|
|
|
|
return RSC_CANDIDATE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
|
|
|
|
const uint8_t *buf, size_t size)
|
|
|
|
{
|
|
|
|
int32_t ret;
|
|
|
|
uint16_t hdr_len;
|
|
|
|
VirtioNetRscChain *chain;
|
|
|
|
VirtioNetRscUnit unit;
|
|
|
|
|
|
|
|
chain = (VirtioNetRscChain *)opq;
|
|
|
|
hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
|
|
|
|
|
|
|
|
if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
|
|
|
|
+ sizeof(tcp_header))) {
|
|
|
|
return virtio_net_do_receive(nc, buf, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
virtio_net_rsc_extract_unit6(chain, buf, &unit);
|
|
|
|
if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
|
|
|
|
unit.ip, buf, size)) {
|
|
|
|
return virtio_net_do_receive(nc, buf, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
|
|
|
|
if (ret == RSC_BYPASS) {
|
|
|
|
return virtio_net_do_receive(nc, buf, size);
|
|
|
|
} else if (ret == RSC_FINAL) {
|
|
|
|
return virtio_net_rsc_drain_flow(chain, nc, buf, size,
|
|
|
|
((hdr_len + sizeof(struct eth_header)) + 8),
|
|
|
|
VIRTIO_NET_IP6_ADDR_SIZE,
|
|
|
|
hdr_len + sizeof(struct eth_header)
|
|
|
|
+ sizeof(struct ip6_header));
|
|
|
|
}
|
|
|
|
|
|
|
|
return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
|
|
|
|
}
|
|
|
|
|
|
|
|
static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
|
|
|
|
NetClientState *nc,
|
|
|
|
uint16_t proto)
|
|
|
|
{
|
|
|
|
VirtioNetRscChain *chain;
|
|
|
|
|
|
|
|
if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
|
|
|
|
if (chain->proto == proto) {
|
|
|
|
return chain;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
chain = g_malloc(sizeof(*chain));
|
|
|
|
chain->n = n;
|
|
|
|
chain->proto = proto;
|
|
|
|
if (proto == (uint16_t)ETH_P_IP) {
|
|
|
|
chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
|
|
|
|
chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
|
|
|
|
} else {
|
|
|
|
chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
|
|
|
|
chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
|
|
|
|
}
|
|
|
|
chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
|
|
|
|
virtio_net_rsc_purge, chain);
|
|
|
|
memset(&chain->stat, 0, sizeof(chain->stat));
|
|
|
|
|
|
|
|
QTAILQ_INIT(&chain->buffers);
|
|
|
|
QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
|
|
|
|
|
|
|
|
return chain;
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t virtio_net_rsc_receive(NetClientState *nc,
|
|
|
|
const uint8_t *buf,
|
|
|
|
size_t size)
|
|
|
|
{
|
|
|
|
uint16_t proto;
|
|
|
|
VirtioNetRscChain *chain;
|
|
|
|
struct eth_header *eth;
|
|
|
|
VirtIONet *n;
|
|
|
|
|
|
|
|
n = qemu_get_nic_opaque(nc);
|
|
|
|
if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
|
|
|
|
return virtio_net_do_receive(nc, buf, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
eth = (struct eth_header *)(buf + n->guest_hdr_len);
|
|
|
|
proto = htons(eth->h_proto);
|
|
|
|
|
|
|
|
chain = virtio_net_rsc_lookup_chain(n, nc, proto);
|
|
|
|
if (chain) {
|
|
|
|
chain->stat.received++;
|
|
|
|
if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
|
|
|
|
return virtio_net_rsc_receive4(chain, nc, buf, size);
|
|
|
|
} else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
|
|
|
|
return virtio_net_rsc_receive6(chain, nc, buf, size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return virtio_net_do_receive(nc, buf, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
|
|
|
|
size_t size)
|
|
|
|
{
|
|
|
|
VirtIONet *n = qemu_get_nic_opaque(nc);
|
|
|
|
if ((n->rsc4_enabled || n->rsc6_enabled)) {
|
|
|
|
return virtio_net_rsc_receive(nc, buf, size);
|
|
|
|
} else {
|
|
|
|
return virtio_net_do_receive(nc, buf, size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-01-30 19:12:38 +08:00
|
|
|
static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
|
2009-06-18 18:21:36 +01:00
|
|
|
|
2012-07-24 16:35:13 +01:00
|
|
|
static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
|
2009-06-18 18:21:36 +01:00
|
|
|
{
|
2013-01-30 19:12:23 +08:00
|
|
|
VirtIONet *n = qemu_get_nic_opaque(nc);
|
2013-01-30 19:12:39 +08:00
|
|
|
VirtIONetQueue *q = virtio_net_get_subqueue(nc);
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
2009-06-18 18:21:36 +01:00
|
|
|
|
2016-02-04 16:26:51 +02:00
|
|
|
virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
|
2013-04-11 16:30:01 +02:00
|
|
|
virtio_notify(vdev, q->tx_vq);
|
2009-06-18 18:21:36 +01:00
|
|
|
|
2016-02-04 16:26:51 +02:00
|
|
|
g_free(q->async_tx.elem);
|
|
|
|
q->async_tx.elem = NULL;
|
2009-06-18 18:21:36 +01:00
|
|
|
|
2013-01-30 19:12:38 +08:00
|
|
|
virtio_queue_set_notification(q->tx_vq, 1);
|
|
|
|
virtio_net_flush_tx(q);
|
2009-06-18 18:21:36 +01:00
|
|
|
}
|
|
|
|
|
2008-12-17 19:13:11 +00:00
|
|
|
/* TX */
|
2013-01-30 19:12:38 +08:00
|
|
|
static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
|
2008-12-17 19:13:11 +00:00
|
|
|
{
|
2013-01-30 19:12:38 +08:00
|
|
|
VirtIONet *n = q->n;
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
2016-02-04 16:26:51 +02:00
|
|
|
VirtQueueElement *elem;
|
2010-09-02 09:00:57 -06:00
|
|
|
int32_t num_packets = 0;
|
2013-01-30 19:12:39 +08:00
|
|
|
int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
|
2013-04-11 16:30:01 +02:00
|
|
|
if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
|
2010-09-02 09:00:57 -06:00
|
|
|
return num_packets;
|
|
|
|
}
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2016-02-04 16:26:51 +02:00
|
|
|
if (q->async_tx.elem) {
|
2013-01-30 19:12:38 +08:00
|
|
|
virtio_queue_set_notification(q->tx_vq, 0);
|
2010-09-02 09:00:57 -06:00
|
|
|
return num_packets;
|
2009-06-18 18:21:36 +01:00
|
|
|
}
|
|
|
|
|
2016-02-04 16:26:51 +02:00
|
|
|
for (;;) {
|
2015-08-03 13:20:38 +08:00
|
|
|
ssize_t ret;
|
2016-02-04 16:26:51 +02:00
|
|
|
unsigned int out_num;
|
|
|
|
struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
|
2015-07-17 15:19:18 +08:00
|
|
|
struct virtio_net_hdr_mrg_rxbuf mhdr;
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2016-02-04 16:26:51 +02:00
|
|
|
elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
|
|
|
|
if (!elem) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
out_num = elem->out_num;
|
|
|
|
out_sg = elem->out_sg;
|
2012-09-24 14:54:44 +02:00
|
|
|
if (out_num < 1) {
|
2016-09-30 17:13:32 +02:00
|
|
|
virtio_error(vdev, "virtio-net header not in first element");
|
|
|
|
virtqueue_detach_element(q->tx_vq, elem, 0);
|
|
|
|
g_free(elem);
|
|
|
|
return -EINVAL;
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
|
|
|
|
2014-06-24 19:11:32 +02:00
|
|
|
if (n->has_vnet_hdr) {
|
2015-07-17 15:19:18 +08:00
|
|
|
if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
|
|
|
|
n->guest_hdr_len) {
|
2016-09-30 17:13:32 +02:00
|
|
|
virtio_error(vdev, "virtio-net header incorrect");
|
|
|
|
virtqueue_detach_element(q->tx_vq, elem, 0);
|
|
|
|
g_free(elem);
|
|
|
|
return -EINVAL;
|
2014-06-24 19:11:32 +02:00
|
|
|
}
|
2016-02-05 11:43:11 +01:00
|
|
|
if (n->needs_vnet_hdr_swap) {
|
2015-07-17 15:19:18 +08:00
|
|
|
virtio_net_hdr_swap(vdev, (void *) &mhdr);
|
|
|
|
sg2[0].iov_base = &mhdr;
|
|
|
|
sg2[0].iov_len = n->guest_hdr_len;
|
|
|
|
out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
|
|
|
|
out_sg, out_num,
|
|
|
|
n->guest_hdr_len, -1);
|
|
|
|
if (out_num == VIRTQUEUE_MAX_SIZE) {
|
|
|
|
goto drop;
|
2018-12-13 23:37:37 +01:00
|
|
|
}
|
2015-07-17 15:19:18 +08:00
|
|
|
out_num += 1;
|
|
|
|
out_sg = sg2;
|
2018-12-13 23:37:37 +01:00
|
|
|
}
|
2014-06-24 19:11:32 +02:00
|
|
|
}
|
2012-09-24 14:52:28 +02:00
|
|
|
/*
|
|
|
|
* If host wants to see the guest header as is, we can
|
|
|
|
* pass it on unchanged. Otherwise, copy just the parts
|
|
|
|
* that host is interested in.
|
|
|
|
*/
|
|
|
|
assert(n->host_hdr_len <= n->guest_hdr_len);
|
|
|
|
if (n->host_hdr_len != n->guest_hdr_len) {
|
|
|
|
unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
|
|
|
|
out_sg, out_num,
|
|
|
|
0, n->host_hdr_len);
|
|
|
|
sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
|
|
|
|
out_sg, out_num,
|
|
|
|
n->guest_hdr_len, -1);
|
|
|
|
out_num = sg_num;
|
|
|
|
out_sg = sg;
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
|
|
|
|
2013-01-30 19:12:39 +08:00
|
|
|
ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
|
|
|
|
out_sg, out_num, virtio_net_tx_complete);
|
2009-06-18 18:21:36 +01:00
|
|
|
if (ret == 0) {
|
2013-01-30 19:12:38 +08:00
|
|
|
virtio_queue_set_notification(q->tx_vq, 0);
|
|
|
|
q->async_tx.elem = elem;
|
2010-09-02 09:00:57 -06:00
|
|
|
return -EBUSY;
|
2009-06-18 18:21:36 +01:00
|
|
|
}
|
|
|
|
|
2015-07-17 15:19:18 +08:00
|
|
|
drop:
|
2016-02-04 16:26:51 +02:00
|
|
|
virtqueue_push(q->tx_vq, elem, 0);
|
2013-04-11 16:30:01 +02:00
|
|
|
virtio_notify(vdev, q->tx_vq);
|
2016-02-04 16:26:51 +02:00
|
|
|
g_free(elem);
|
2010-09-02 09:00:57 -06:00
|
|
|
|
|
|
|
if (++num_packets >= n->tx_burst) {
|
|
|
|
break;
|
|
|
|
}
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
2010-09-02 09:00:57 -06:00
|
|
|
return num_packets;
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
|
|
|
|
2010-09-02 09:01:10 -06:00
|
|
|
static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
|
2008-12-17 19:13:11 +00:00
|
|
|
{
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIONet *n = VIRTIO_NET(vdev);
|
2013-01-30 19:12:39 +08:00
|
|
|
VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2016-12-13 10:12:08 +02:00
|
|
|
if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
|
|
|
|
virtio_net_drop_tx_queue_data(vdev, vq);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2010-11-22 19:52:30 +02:00
|
|
|
/* This happens when device was stopped but VCPU wasn't. */
|
2013-04-11 16:30:01 +02:00
|
|
|
if (!vdev->vm_running) {
|
2013-01-30 19:12:38 +08:00
|
|
|
q->tx_waiting = 1;
|
2010-11-22 19:52:30 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2013-01-30 19:12:38 +08:00
|
|
|
if (q->tx_waiting) {
|
2008-12-17 19:13:11 +00:00
|
|
|
virtio_queue_set_notification(vq, 1);
|
2013-08-21 16:03:08 +01:00
|
|
|
timer_del(q->tx_timer);
|
2013-01-30 19:12:38 +08:00
|
|
|
q->tx_waiting = 0;
|
2016-09-30 17:13:32 +02:00
|
|
|
if (virtio_net_flush_tx(q) == -EINVAL) {
|
|
|
|
return;
|
|
|
|
}
|
2008-12-17 19:13:11 +00:00
|
|
|
} else {
|
2013-08-21 16:03:08 +01:00
|
|
|
timer_mod(q->tx_timer,
|
|
|
|
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
|
2013-01-30 19:12:38 +08:00
|
|
|
q->tx_waiting = 1;
|
2008-12-17 19:13:11 +00:00
|
|
|
virtio_queue_set_notification(vq, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-09-02 09:01:10 -06:00
|
|
|
static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
|
|
|
|
{
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIONet *n = VIRTIO_NET(vdev);
|
2013-01-30 19:12:39 +08:00
|
|
|
VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
|
2010-09-02 09:01:10 -06:00
|
|
|
|
2016-12-13 10:12:08 +02:00
|
|
|
if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
|
|
|
|
virtio_net_drop_tx_queue_data(vdev, vq);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2013-01-30 19:12:38 +08:00
|
|
|
if (unlikely(q->tx_waiting)) {
|
2010-09-02 09:01:10 -06:00
|
|
|
return;
|
|
|
|
}
|
2013-01-30 19:12:38 +08:00
|
|
|
q->tx_waiting = 1;
|
2010-11-22 19:52:30 +02:00
|
|
|
/* This happens when device was stopped but VCPU wasn't. */
|
2013-04-11 16:30:01 +02:00
|
|
|
if (!vdev->vm_running) {
|
2010-11-22 19:52:30 +02:00
|
|
|
return;
|
|
|
|
}
|
2010-09-02 09:01:10 -06:00
|
|
|
virtio_queue_set_notification(vq, 0);
|
2013-01-30 19:12:38 +08:00
|
|
|
qemu_bh_schedule(q->tx_bh);
|
2010-09-02 09:01:10 -06:00
|
|
|
}
|
|
|
|
|
2008-12-17 19:13:11 +00:00
|
|
|
static void virtio_net_tx_timer(void *opaque)
|
|
|
|
{
|
2013-01-30 19:12:38 +08:00
|
|
|
VirtIONetQueue *q = opaque;
|
|
|
|
VirtIONet *n = q->n;
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
2014-09-02 17:26:12 +03:00
|
|
|
/* This happens when device was stopped but BH wasn't. */
|
|
|
|
if (!vdev->vm_running) {
|
|
|
|
/* Make sure tx waiting is set, so we'll run when restarted. */
|
|
|
|
assert(q->tx_waiting);
|
|
|
|
return;
|
|
|
|
}
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2013-01-30 19:12:38 +08:00
|
|
|
q->tx_waiting = 0;
|
2008-12-17 19:13:11 +00:00
|
|
|
|
|
|
|
/* Just in case the driver is not ready on more */
|
2013-04-11 16:30:01 +02:00
|
|
|
if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
|
2008-12-17 19:13:11 +00:00
|
|
|
return;
|
2013-04-11 16:30:01 +02:00
|
|
|
}
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2013-01-30 19:12:38 +08:00
|
|
|
virtio_queue_set_notification(q->tx_vq, 1);
|
|
|
|
virtio_net_flush_tx(q);
|
2008-12-17 19:13:11 +00:00
|
|
|
}
|
|
|
|
|
2010-09-02 09:01:10 -06:00
|
|
|
static void virtio_net_tx_bh(void *opaque)
|
|
|
|
{
|
2013-01-30 19:12:38 +08:00
|
|
|
VirtIONetQueue *q = opaque;
|
|
|
|
VirtIONet *n = q->n;
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
2010-09-02 09:01:10 -06:00
|
|
|
int32_t ret;
|
|
|
|
|
2014-09-02 17:26:12 +03:00
|
|
|
/* This happens when device was stopped but BH wasn't. */
|
|
|
|
if (!vdev->vm_running) {
|
|
|
|
/* Make sure tx waiting is set, so we'll run when restarted. */
|
|
|
|
assert(q->tx_waiting);
|
|
|
|
return;
|
|
|
|
}
|
2010-11-22 19:52:30 +02:00
|
|
|
|
2013-01-30 19:12:38 +08:00
|
|
|
q->tx_waiting = 0;
|
2010-09-02 09:01:10 -06:00
|
|
|
|
|
|
|
/* Just in case the driver is not ready on more */
|
2013-04-11 16:30:01 +02:00
|
|
|
if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
|
2010-09-02 09:01:10 -06:00
|
|
|
return;
|
2013-04-11 16:30:01 +02:00
|
|
|
}
|
2010-09-02 09:01:10 -06:00
|
|
|
|
2013-01-30 19:12:38 +08:00
|
|
|
ret = virtio_net_flush_tx(q);
|
2016-09-30 17:13:32 +02:00
|
|
|
if (ret == -EBUSY || ret == -EINVAL) {
|
|
|
|
return; /* Notification re-enable handled by tx_complete or device
|
|
|
|
* broken */
|
2010-09-02 09:01:10 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
/* If we flush a full burst of packets, assume there are
|
|
|
|
* more coming and immediately reschedule */
|
|
|
|
if (ret >= n->tx_burst) {
|
2013-01-30 19:12:38 +08:00
|
|
|
qemu_bh_schedule(q->tx_bh);
|
|
|
|
q->tx_waiting = 1;
|
2010-09-02 09:01:10 -06:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If less than a full burst, re-enable notification and flush
|
|
|
|
* anything that may have come in while we weren't looking. If
|
|
|
|
* we find something, assume the guest is still active and reschedule */
|
2013-01-30 19:12:38 +08:00
|
|
|
virtio_queue_set_notification(q->tx_vq, 1);
|
2016-09-30 17:13:32 +02:00
|
|
|
ret = virtio_net_flush_tx(q);
|
|
|
|
if (ret == -EINVAL) {
|
|
|
|
return;
|
|
|
|
} else if (ret > 0) {
|
2013-01-30 19:12:38 +08:00
|
|
|
virtio_queue_set_notification(q->tx_vq, 0);
|
|
|
|
qemu_bh_schedule(q->tx_bh);
|
|
|
|
q->tx_waiting = 1;
|
2010-09-02 09:01:10 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-15 17:20:59 +08:00
|
|
|
static void virtio_net_add_queue(VirtIONet *n, int index)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
|
|
|
|
2016-08-10 17:47:16 +03:00
|
|
|
n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
|
|
|
|
virtio_net_handle_rx);
|
2017-06-28 10:37:59 +08:00
|
|
|
|
2015-07-15 17:20:59 +08:00
|
|
|
if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
|
|
|
|
n->vqs[index].tx_vq =
|
2017-06-28 10:37:59 +08:00
|
|
|
virtio_add_queue(vdev, n->net_conf.tx_queue_size,
|
|
|
|
virtio_net_handle_tx_timer);
|
2015-07-15 17:20:59 +08:00
|
|
|
n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
|
|
|
|
virtio_net_tx_timer,
|
|
|
|
&n->vqs[index]);
|
|
|
|
} else {
|
|
|
|
n->vqs[index].tx_vq =
|
2017-06-28 10:37:59 +08:00
|
|
|
virtio_add_queue(vdev, n->net_conf.tx_queue_size,
|
|
|
|
virtio_net_handle_tx_bh);
|
2015-07-15 17:20:59 +08:00
|
|
|
n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
|
|
|
|
}
|
|
|
|
|
|
|
|
n->vqs[index].tx_waiting = 0;
|
|
|
|
n->vqs[index].n = n;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_net_del_queue(VirtIONet *n, int index)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
|
|
|
VirtIONetQueue *q = &n->vqs[index];
|
|
|
|
NetClientState *nc = qemu_get_subqueue(n->nic, index);
|
|
|
|
|
|
|
|
qemu_purge_queued_packets(nc);
|
|
|
|
|
|
|
|
virtio_del_queue(vdev, index * 2);
|
|
|
|
if (q->tx_timer) {
|
|
|
|
timer_free(q->tx_timer);
|
2017-04-26 14:45:56 +08:00
|
|
|
q->tx_timer = NULL;
|
2015-07-15 17:20:59 +08:00
|
|
|
} else {
|
|
|
|
qemu_bh_delete(q->tx_bh);
|
2017-04-26 14:45:56 +08:00
|
|
|
q->tx_bh = NULL;
|
2015-07-15 17:20:59 +08:00
|
|
|
}
|
2017-04-26 14:45:56 +08:00
|
|
|
q->tx_waiting = 0;
|
2015-07-15 17:20:59 +08:00
|
|
|
virtio_del_queue(vdev, index * 2 + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
|
|
|
|
{
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
|
|
|
int old_num_queues = virtio_get_num_queues(vdev);
|
|
|
|
int new_num_queues = new_max_queues * 2 + 1;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
assert(old_num_queues >= 3);
|
|
|
|
assert(old_num_queues % 2 == 1);
|
|
|
|
|
|
|
|
if (old_num_queues == new_num_queues) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We always need to remove and add ctrl vq if
|
|
|
|
* old_num_queues != new_num_queues. Remove ctrl_vq first,
|
2019-03-21 18:18:32 +02:00
|
|
|
* and then we only enter one of the following two loops.
|
2015-07-15 17:20:59 +08:00
|
|
|
*/
|
|
|
|
virtio_del_queue(vdev, old_num_queues - 1);
|
|
|
|
|
|
|
|
for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
|
|
|
|
/* new_num_queues < old_num_queues */
|
|
|
|
virtio_net_del_queue(n, i / 2);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
|
|
|
|
/* new_num_queues > old_num_queues */
|
|
|
|
virtio_net_add_queue(n, i / 2);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* add ctrl_vq last */
|
|
|
|
n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
|
|
|
|
}
|
|
|
|
|
2013-04-25 15:24:23 +08:00
|
|
|
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
|
2013-01-30 19:12:39 +08:00
|
|
|
{
|
2015-07-15 17:20:59 +08:00
|
|
|
int max = multiqueue ? n->max_queues : 1;
|
|
|
|
|
2013-01-30 19:12:39 +08:00
|
|
|
n->multiqueue = multiqueue;
|
2015-07-15 17:20:59 +08:00
|
|
|
virtio_net_change_num_queues(n, max);
|
2013-01-30 19:12:39 +08:00
|
|
|
|
|
|
|
virtio_net_set_queues(n);
|
|
|
|
}
|
|
|
|
|
2017-02-03 16:06:51 +00:00
|
|
|
static int virtio_net_post_load_device(void *opaque, int version_id)
|
2014-06-24 19:19:03 +02:00
|
|
|
{
|
2017-02-03 16:06:51 +00:00
|
|
|
VirtIONet *n = opaque;
|
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
2014-06-24 19:19:03 +02:00
|
|
|
int i, link_down;
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2019-02-27 13:24:07 +00:00
|
|
|
trace_virtio_net_post_load_device();
|
2017-02-03 16:06:51 +00:00
|
|
|
virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
|
2015-08-17 11:48:29 +02:00
|
|
|
virtio_vdev_has_feature(vdev,
|
2020-05-08 15:59:31 +03:00
|
|
|
VIRTIO_F_VERSION_1),
|
|
|
|
virtio_vdev_has_feature(vdev,
|
|
|
|
VIRTIO_NET_F_HASH_REPORT));
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2016-07-14 18:22:43 +01:00
|
|
|
/* MAC_TABLE_ENTRIES may be different from the saved image */
|
2017-02-03 16:06:51 +00:00
|
|
|
if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
|
2016-07-14 18:22:43 +01:00
|
|
|
n->mac_table.in_use = 0;
|
2009-02-05 22:36:28 +00:00
|
|
|
}
|
2009-10-22 17:43:50 +01:00
|
|
|
|
2017-02-03 16:06:51 +00:00
|
|
|
if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
|
2016-07-04 14:47:37 +03:00
|
|
|
n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
|
|
|
|
}
|
|
|
|
|
virtio-net: prevent offloads reset on migration
Currently offloads disabled by guest via the VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET
command are not preserved on VM migration.
Instead all offloads reported by guest features (via VIRTIO_PCI_GUEST_FEATURES)
get enabled.
What happens is: first the VirtIONet::curr_guest_offloads gets restored and offloads
are getting set correctly:
#0 qemu_set_offload (nc=0x555556a11400, csum=1, tso4=0, tso6=0, ecn=0, ufo=0) at net/net.c:474
#1 virtio_net_apply_guest_offloads (n=0x555557701ca0) at hw/net/virtio-net.c:720
#2 virtio_net_post_load_device (opaque=0x555557701ca0, version_id=11) at hw/net/virtio-net.c:2334
#3 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577c80 <vmstate_virtio_net_device>, opaque=0x555557701ca0, version_id=11)
at migration/vmstate.c:168
#4 virtio_load (vdev=0x555557701ca0, f=0x5555569dc010, version_id=11) at hw/virtio/virtio.c:2197
#5 virtio_device_get (f=0x5555569dc010, opaque=0x555557701ca0, size=0, field=0x55555668cd00 <__compound_literal.5>) at hw/virtio/virtio.c:2036
#6 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577ce0 <vmstate_virtio_net>, opaque=0x555557701ca0, version_id=11) at migration/vmstate.c:143
#7 vmstate_load (f=0x5555569dc010, se=0x5555578189e0) at migration/savevm.c:829
#8 qemu_loadvm_section_start_full (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2211
#9 qemu_loadvm_state_main (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2395
#10 qemu_loadvm_state (f=0x5555569dc010) at migration/savevm.c:2467
#11 process_incoming_migration_co (opaque=0x0) at migration/migration.c:449
However later on the features are getting restored, and offloads get reset to
everything supported by features:
#0 qemu_set_offload (nc=0x555556a11400, csum=1, tso4=1, tso6=1, ecn=0, ufo=0) at net/net.c:474
#1 virtio_net_apply_guest_offloads (n=0x555557701ca0) at hw/net/virtio-net.c:720
#2 virtio_net_set_features (vdev=0x555557701ca0, features=5104441767) at hw/net/virtio-net.c:773
#3 virtio_set_features_nocheck (vdev=0x555557701ca0, val=5104441767) at hw/virtio/virtio.c:2052
#4 virtio_load (vdev=0x555557701ca0, f=0x5555569dc010, version_id=11) at hw/virtio/virtio.c:2220
#5 virtio_device_get (f=0x5555569dc010, opaque=0x555557701ca0, size=0, field=0x55555668cd00 <__compound_literal.5>) at hw/virtio/virtio.c:2036
#6 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577ce0 <vmstate_virtio_net>, opaque=0x555557701ca0, version_id=11) at migration/vmstate.c:143
#7 vmstate_load (f=0x5555569dc010, se=0x5555578189e0) at migration/savevm.c:829
#8 qemu_loadvm_section_start_full (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2211
#9 qemu_loadvm_state_main (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2395
#10 qemu_loadvm_state (f=0x5555569dc010) at migration/savevm.c:2467
#11 process_incoming_migration_co (opaque=0x0) at migration/migration.c:449
Fix this by preserving the state in saved_guest_offloads field and
pushing out offload initialization to the new post load hook.
Cc: qemu-stable@nongnu.org
Signed-off-by: Mikhail Sennikovsky <mikhail.sennikovskii@cloud.ionos.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2019-10-11 15:58:04 +02:00
|
|
|
/*
|
|
|
|
* curr_guest_offloads will be later overwritten by the
|
|
|
|
* virtio_set_features_nocheck call done from the virtio_load.
|
|
|
|
* Here we make sure it is preserved and restored accordingly
|
|
|
|
* in the virtio_net_post_load_virtio callback.
|
|
|
|
*/
|
|
|
|
n->saved_guest_offloads = n->curr_guest_offloads;
|
2016-07-04 14:47:37 +03:00
|
|
|
|
2013-01-30 19:12:40 +08:00
|
|
|
virtio_net_set_queues(n);
|
|
|
|
|
2009-06-05 14:47:13 -06:00
|
|
|
/* Find the first multicast entry in the saved MAC filter */
|
|
|
|
for (i = 0; i < n->mac_table.in_use; i++) {
|
|
|
|
if (n->mac_table.macs[i * ETH_ALEN] & 1) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
n->mac_table.first_multi = i;
|
2012-09-28 10:06:02 +08:00
|
|
|
|
|
|
|
/* nc.link_down can't be migrated, so infer link_down according
|
|
|
|
* to link status bit in n->status */
|
2013-01-30 19:12:40 +08:00
|
|
|
link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
|
|
|
|
for (i = 0; i < n->max_queues; i++) {
|
|
|
|
qemu_get_subqueue(n->nic, i)->link_down = link_down;
|
|
|
|
}
|
2012-09-28 10:06:02 +08:00
|
|
|
|
2016-07-04 14:47:37 +03:00
|
|
|
if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
|
|
|
|
virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
|
2019-02-27 13:24:07 +00:00
|
|
|
qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
|
|
|
|
QEMU_CLOCK_VIRTUAL,
|
|
|
|
virtio_net_announce_timer, n);
|
|
|
|
if (n->announce_timer.round) {
|
|
|
|
timer_mod(n->announce_timer.tm,
|
|
|
|
qemu_clock_get_ms(n->announce_timer.type));
|
|
|
|
} else {
|
2019-06-20 19:47:04 +01:00
|
|
|
qemu_announce_timer_del(&n->announce_timer, false);
|
2019-02-27 13:24:07 +00:00
|
|
|
}
|
2016-07-04 14:47:37 +03:00
|
|
|
}
|
|
|
|
|
2020-05-08 15:59:33 +03:00
|
|
|
if (n->rss_data.enabled) {
|
2021-05-14 14:48:33 +03:00
|
|
|
n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
|
|
|
|
if (!n->rss_data.populate_hash) {
|
|
|
|
if (!virtio_net_attach_epbf_rss(n)) {
|
|
|
|
if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
|
|
|
|
warn_report("Can't post-load eBPF RSS for vhost");
|
|
|
|
} else {
|
|
|
|
warn_report("Can't post-load eBPF RSS - "
|
|
|
|
"fallback to software RSS");
|
|
|
|
n->rss_data.enabled_software_rss = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-08 15:59:33 +03:00
|
|
|
trace_virtio_net_rss_enable(n->rss_data.hash_types,
|
|
|
|
n->rss_data.indirections_len,
|
|
|
|
sizeof(n->rss_data.key));
|
|
|
|
} else {
|
|
|
|
trace_virtio_net_rss_disable();
|
|
|
|
}
|
2008-12-17 19:13:11 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
virtio-net: prevent offloads reset on migration
Currently offloads disabled by guest via the VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET
command are not preserved on VM migration.
Instead all offloads reported by guest features (via VIRTIO_PCI_GUEST_FEATURES)
get enabled.
What happens is: first the VirtIONet::curr_guest_offloads gets restored and offloads
are getting set correctly:
#0 qemu_set_offload (nc=0x555556a11400, csum=1, tso4=0, tso6=0, ecn=0, ufo=0) at net/net.c:474
#1 virtio_net_apply_guest_offloads (n=0x555557701ca0) at hw/net/virtio-net.c:720
#2 virtio_net_post_load_device (opaque=0x555557701ca0, version_id=11) at hw/net/virtio-net.c:2334
#3 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577c80 <vmstate_virtio_net_device>, opaque=0x555557701ca0, version_id=11)
at migration/vmstate.c:168
#4 virtio_load (vdev=0x555557701ca0, f=0x5555569dc010, version_id=11) at hw/virtio/virtio.c:2197
#5 virtio_device_get (f=0x5555569dc010, opaque=0x555557701ca0, size=0, field=0x55555668cd00 <__compound_literal.5>) at hw/virtio/virtio.c:2036
#6 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577ce0 <vmstate_virtio_net>, opaque=0x555557701ca0, version_id=11) at migration/vmstate.c:143
#7 vmstate_load (f=0x5555569dc010, se=0x5555578189e0) at migration/savevm.c:829
#8 qemu_loadvm_section_start_full (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2211
#9 qemu_loadvm_state_main (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2395
#10 qemu_loadvm_state (f=0x5555569dc010) at migration/savevm.c:2467
#11 process_incoming_migration_co (opaque=0x0) at migration/migration.c:449
However later on the features are getting restored, and offloads get reset to
everything supported by features:
#0 qemu_set_offload (nc=0x555556a11400, csum=1, tso4=1, tso6=1, ecn=0, ufo=0) at net/net.c:474
#1 virtio_net_apply_guest_offloads (n=0x555557701ca0) at hw/net/virtio-net.c:720
#2 virtio_net_set_features (vdev=0x555557701ca0, features=5104441767) at hw/net/virtio-net.c:773
#3 virtio_set_features_nocheck (vdev=0x555557701ca0, val=5104441767) at hw/virtio/virtio.c:2052
#4 virtio_load (vdev=0x555557701ca0, f=0x5555569dc010, version_id=11) at hw/virtio/virtio.c:2220
#5 virtio_device_get (f=0x5555569dc010, opaque=0x555557701ca0, size=0, field=0x55555668cd00 <__compound_literal.5>) at hw/virtio/virtio.c:2036
#6 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577ce0 <vmstate_virtio_net>, opaque=0x555557701ca0, version_id=11) at migration/vmstate.c:143
#7 vmstate_load (f=0x5555569dc010, se=0x5555578189e0) at migration/savevm.c:829
#8 qemu_loadvm_section_start_full (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2211
#9 qemu_loadvm_state_main (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2395
#10 qemu_loadvm_state (f=0x5555569dc010) at migration/savevm.c:2467
#11 process_incoming_migration_co (opaque=0x0) at migration/migration.c:449
Fix this by preserving the state in saved_guest_offloads field and
pushing out offload initialization to the new post load hook.
Cc: qemu-stable@nongnu.org
Signed-off-by: Mikhail Sennikovsky <mikhail.sennikovskii@cloud.ionos.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2019-10-11 15:58:04 +02:00
|
|
|
static int virtio_net_post_load_virtio(VirtIODevice *vdev)
|
|
|
|
{
|
|
|
|
VirtIONet *n = VIRTIO_NET(vdev);
|
|
|
|
/*
|
|
|
|
* The actual needed state is now in saved_guest_offloads,
|
|
|
|
* see virtio_net_post_load_device for detail.
|
|
|
|
* Restore it back and apply the desired offloads.
|
|
|
|
*/
|
|
|
|
n->curr_guest_offloads = n->saved_guest_offloads;
|
|
|
|
if (peer_has_vnet_hdr(n)) {
|
|
|
|
virtio_net_apply_guest_offloads(n);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-02-03 16:06:51 +00:00
|
|
|
/* tx_waiting field of a VirtIONetQueue */
|
|
|
|
static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
|
|
|
|
.name = "virtio-net-queue-tx_waiting",
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
static bool max_queues_gt_1(void *opaque, int version_id)
|
|
|
|
{
|
|
|
|
return VIRTIO_NET(opaque)->max_queues > 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool has_ctrl_guest_offloads(void *opaque, int version_id)
|
|
|
|
{
|
|
|
|
return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
|
|
|
|
VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool mac_table_fits(void *opaque, int version_id)
|
|
|
|
{
|
|
|
|
return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool mac_table_doesnt_fit(void *opaque, int version_id)
|
|
|
|
{
|
|
|
|
return !mac_table_fits(opaque, version_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This temporary type is shared by all the WITH_TMP methods
|
|
|
|
* although only some fields are used by each.
|
|
|
|
*/
|
|
|
|
struct VirtIONetMigTmp {
|
|
|
|
VirtIONet *parent;
|
|
|
|
VirtIONetQueue *vqs_1;
|
|
|
|
uint16_t curr_queues_1;
|
|
|
|
uint8_t has_ufo;
|
|
|
|
uint32_t has_vnet_hdr;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* The 2nd and subsequent tx_waiting flags are loaded later than
|
|
|
|
* the 1st entry in the queues and only if there's more than one
|
|
|
|
* entry. We use the tmp mechanism to calculate a temporary
|
|
|
|
* pointer and count and also validate the count.
|
|
|
|
*/
|
|
|
|
|
2017-09-25 12:29:12 +01:00
|
|
|
static int virtio_net_tx_waiting_pre_save(void *opaque)
|
2017-02-03 16:06:51 +00:00
|
|
|
{
|
|
|
|
struct VirtIONetMigTmp *tmp = opaque;
|
|
|
|
|
|
|
|
tmp->vqs_1 = tmp->parent->vqs + 1;
|
|
|
|
tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
|
|
|
|
if (tmp->parent->curr_queues == 0) {
|
|
|
|
tmp->curr_queues_1 = 0;
|
|
|
|
}
|
2017-09-25 12:29:12 +01:00
|
|
|
|
|
|
|
return 0;
|
2017-02-03 16:06:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int virtio_net_tx_waiting_pre_load(void *opaque)
|
|
|
|
{
|
|
|
|
struct VirtIONetMigTmp *tmp = opaque;
|
|
|
|
|
|
|
|
/* Reuse the pointer setup from save */
|
|
|
|
virtio_net_tx_waiting_pre_save(opaque);
|
|
|
|
|
|
|
|
if (tmp->parent->curr_queues > tmp->parent->max_queues) {
|
|
|
|
error_report("virtio-net: curr_queues %x > max_queues %x",
|
|
|
|
tmp->parent->curr_queues, tmp->parent->max_queues);
|
|
|
|
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0; /* all good */
|
|
|
|
}
|
|
|
|
|
|
|
|
static const VMStateDescription vmstate_virtio_net_tx_waiting = {
|
|
|
|
.name = "virtio-net-tx_waiting",
|
|
|
|
.pre_load = virtio_net_tx_waiting_pre_load,
|
|
|
|
.pre_save = virtio_net_tx_waiting_pre_save,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
|
|
|
|
curr_queues_1,
|
|
|
|
vmstate_virtio_net_queue_tx_waiting,
|
|
|
|
struct VirtIONetQueue),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
/* the 'has_ufo' flag is just tested; if the incoming stream has the
|
|
|
|
* flag set we need to check that we have it
|
|
|
|
*/
|
|
|
|
static int virtio_net_ufo_post_load(void *opaque, int version_id)
|
|
|
|
{
|
|
|
|
struct VirtIONetMigTmp *tmp = opaque;
|
|
|
|
|
|
|
|
if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
|
|
|
|
error_report("virtio-net: saved image requires TUN_F_UFO support");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-09-25 12:29:12 +01:00
|
|
|
static int virtio_net_ufo_pre_save(void *opaque)
|
2017-02-03 16:06:51 +00:00
|
|
|
{
|
|
|
|
struct VirtIONetMigTmp *tmp = opaque;
|
|
|
|
|
|
|
|
tmp->has_ufo = tmp->parent->has_ufo;
|
2017-09-25 12:29:12 +01:00
|
|
|
|
|
|
|
return 0;
|
2017-02-03 16:06:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static const VMStateDescription vmstate_virtio_net_has_ufo = {
|
|
|
|
.name = "virtio-net-ufo",
|
|
|
|
.post_load = virtio_net_ufo_post_load,
|
|
|
|
.pre_save = virtio_net_ufo_pre_save,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
/* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
|
|
|
|
* flag set we need to check that we have it
|
|
|
|
*/
|
|
|
|
static int virtio_net_vnet_post_load(void *opaque, int version_id)
|
|
|
|
{
|
|
|
|
struct VirtIONetMigTmp *tmp = opaque;
|
|
|
|
|
|
|
|
if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
|
|
|
|
error_report("virtio-net: saved image requires vnet_hdr=on");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-09-25 12:29:12 +01:00
|
|
|
static int virtio_net_vnet_pre_save(void *opaque)
|
2017-02-03 16:06:51 +00:00
|
|
|
{
|
|
|
|
struct VirtIONetMigTmp *tmp = opaque;
|
|
|
|
|
|
|
|
tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
|
2017-09-25 12:29:12 +01:00
|
|
|
|
|
|
|
return 0;
|
2017-02-03 16:06:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static const VMStateDescription vmstate_virtio_net_has_vnet = {
|
|
|
|
.name = "virtio-net-vnet",
|
|
|
|
.post_load = virtio_net_vnet_post_load,
|
|
|
|
.pre_save = virtio_net_vnet_pre_save,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
2020-05-08 15:59:33 +03:00
|
|
|
static bool virtio_net_rss_needed(void *opaque)
|
|
|
|
{
|
|
|
|
return VIRTIO_NET(opaque)->rss_data.enabled;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const VMStateDescription vmstate_virtio_net_rss = {
|
|
|
|
.name = "virtio-net-device/rss",
|
|
|
|
.version_id = 1,
|
|
|
|
.minimum_version_id = 1,
|
|
|
|
.needed = virtio_net_rss_needed,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_BOOL(rss_data.enabled, VirtIONet),
|
|
|
|
VMSTATE_BOOL(rss_data.redirect, VirtIONet),
|
|
|
|
VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
|
|
|
|
VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
|
|
|
|
VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
|
|
|
|
VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
|
|
|
|
VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
|
|
|
|
VIRTIO_NET_RSS_MAX_KEY_SIZE),
|
|
|
|
VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
|
|
|
|
rss_data.indirections_len, 0,
|
|
|
|
vmstate_info_uint16, uint16_t),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
2017-02-03 16:06:51 +00:00
|
|
|
static const VMStateDescription vmstate_virtio_net_device = {
|
|
|
|
.name = "virtio-net-device",
|
|
|
|
.version_id = VIRTIO_NET_VM_VERSION,
|
|
|
|
.minimum_version_id = VIRTIO_NET_VM_VERSION,
|
|
|
|
.post_load = virtio_net_post_load_device,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
|
|
|
|
VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
|
|
|
|
vmstate_virtio_net_queue_tx_waiting,
|
|
|
|
VirtIONetQueue),
|
|
|
|
VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
|
|
|
|
VMSTATE_UINT16(status, VirtIONet),
|
|
|
|
VMSTATE_UINT8(promisc, VirtIONet),
|
|
|
|
VMSTATE_UINT8(allmulti, VirtIONet),
|
|
|
|
VMSTATE_UINT32(mac_table.in_use, VirtIONet),
|
|
|
|
|
|
|
|
/* Guarded pair: If it fits we load it, else we throw it away
|
|
|
|
* - can happen if source has a larger MAC table.; post-load
|
|
|
|
* sets flags in this case.
|
|
|
|
*/
|
|
|
|
VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
|
|
|
|
0, mac_table_fits, mac_table.in_use,
|
|
|
|
ETH_ALEN),
|
|
|
|
VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
|
|
|
|
mac_table.in_use, ETH_ALEN),
|
|
|
|
|
|
|
|
/* Note: This is an array of uint32's that's always been saved as a
|
|
|
|
* buffer; hold onto your endiannesses; it's actually used as a bitmap
|
|
|
|
* but based on the uint.
|
|
|
|
*/
|
|
|
|
VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
|
|
|
|
VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
|
|
|
|
vmstate_virtio_net_has_vnet),
|
|
|
|
VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
|
|
|
|
VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
|
|
|
|
VMSTATE_UINT8(alluni, VirtIONet),
|
|
|
|
VMSTATE_UINT8(nomulti, VirtIONet),
|
|
|
|
VMSTATE_UINT8(nouni, VirtIONet),
|
|
|
|
VMSTATE_UINT8(nobcast, VirtIONet),
|
|
|
|
VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
|
|
|
|
vmstate_virtio_net_has_ufo),
|
|
|
|
VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
|
|
|
|
vmstate_info_uint16_equal, uint16_t),
|
|
|
|
VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
|
|
|
|
VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
|
|
|
|
vmstate_virtio_net_tx_waiting),
|
|
|
|
VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
|
|
|
|
has_ctrl_guest_offloads),
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
},
|
2020-05-08 15:59:33 +03:00
|
|
|
.subsections = (const VMStateDescription * []) {
|
|
|
|
&vmstate_virtio_net_rss,
|
|
|
|
NULL
|
|
|
|
}
|
2017-02-03 16:06:51 +00:00
|
|
|
};
|
|
|
|
|
2009-11-25 18:49:11 +00:00
|
|
|
static NetClientInfo net_virtio_info = {
|
qapi: Change Netdev into a flat union
This is a mostly-mechanical conversion that creates a new flat
union 'Netdev' QAPI type that covers all the branches of the
former 'NetClientOptions' simple union, where the branches are
now listed in a new 'NetClientDriver' enum rather than generated
from the simple union. The existence of a flat union has no
change to the command line syntax accepted for new code, and
will make it possible for a future patch to switch the QMP
command to parse a boxed union for no change to valid QMP; but
it does have some ripple effect on the C code when dealing with
the new types.
While making the conversion, note that the 'NetLegacy' type
remains unchanged: it applies only to legacy command line options,
and will not be ported to QMP, so it should remain a wrapper
around a simple union; to avoid confusion, the type named
'NetClientOptions' is now gone, and we introduce 'NetLegacyOptions'
in its place. Then, in the C code, we convert from NetLegacy to
Netdev as soon as possible, so that the bulk of the net stack
only has to deal with one QAPI type, not two. Note that since
the old legacy code always rejected 'hubport', we can just omit
that branch from the new 'NetLegacyOptions' simple union.
Based on an idea originally by Zoltán Kővágó <DirtY.iCE.hu@gmail.com>:
Message-Id: <01a527fbf1a5de880091f98cf011616a78adeeee.1441627176.git.DirtY.iCE.hu@gmail.com>
although the sed script in that patch no longer applies due to
other changes in the tree since then, and I also did some manual
cleanups (such as fixing whitespace to keep checkpatch happy).
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <1468468228-27827-13-git-send-email-eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Fixup from Eric squashed in]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
2016-07-13 21:50:23 -06:00
|
|
|
.type = NET_CLIENT_DRIVER_NIC,
|
2009-11-25 18:49:11 +00:00
|
|
|
.size = sizeof(NICState),
|
|
|
|
.can_receive = virtio_net_can_receive,
|
|
|
|
.receive = virtio_net_receive,
|
|
|
|
.link_status_changed = virtio_net_set_link_status,
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
.query_rx_filter = virtio_net_query_rxfilter,
|
2019-02-27 13:24:10 +00:00
|
|
|
.announce = virtio_net_announce,
|
2009-11-25 18:49:11 +00:00
|
|
|
};
|
|
|
|
|
2012-12-24 17:37:01 +02:00
|
|
|
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
|
|
|
|
{
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIONet *n = VIRTIO_NET(vdev);
|
2013-01-30 19:12:39 +08:00
|
|
|
NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
|
2012-12-24 17:37:01 +02:00
|
|
|
assert(n->vhost_started);
|
2014-05-27 15:05:08 +03:00
|
|
|
return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
|
2012-12-24 17:37:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
|
|
|
|
bool mask)
|
|
|
|
{
|
2013-04-11 16:30:01 +02:00
|
|
|
VirtIONet *n = VIRTIO_NET(vdev);
|
2013-01-30 19:12:39 +08:00
|
|
|
NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
|
2012-12-24 17:37:01 +02:00
|
|
|
assert(n->vhost_started);
|
2014-05-27 15:05:08 +03:00
|
|
|
vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
|
2012-12-24 17:37:01 +02:00
|
|
|
vdev, idx, mask);
|
|
|
|
}
|
|
|
|
|
2015-06-01 10:45:40 +02:00
|
|
|
static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
|
2008-12-17 19:13:11 +00:00
|
|
|
{
|
2014-12-11 14:25:05 +01:00
|
|
|
virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
|
2016-12-10 16:30:38 +01:00
|
|
|
|
2019-02-21 11:33:08 +01:00
|
|
|
n->config_size = virtio_feature_get_config_size(feature_sizes,
|
|
|
|
host_features);
|
2013-04-11 16:29:57 +02:00
|
|
|
}
|
|
|
|
|
2013-05-15 14:12:49 +02:00
|
|
|
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
|
|
|
|
const char *type)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The name can be NULL, the netclient name will be type.x.
|
|
|
|
*/
|
|
|
|
assert(type != NULL);
|
|
|
|
|
2014-06-06 18:43:29 +02:00
|
|
|
g_free(n->netclient_name);
|
|
|
|
g_free(n->netclient_type);
|
2014-06-06 18:43:30 +02:00
|
|
|
n->netclient_name = g_strdup(name);
|
2013-05-15 14:12:49 +02:00
|
|
|
n->netclient_type = g_strdup(type);
|
|
|
|
}
|
|
|
|
|
2020-11-18 09:37:46 +01:00
|
|
|
static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
|
2019-10-29 12:49:04 +01:00
|
|
|
{
|
|
|
|
HotplugHandler *hotplug_ctrl;
|
|
|
|
PCIDevice *pci_dev;
|
|
|
|
Error *err = NULL;
|
|
|
|
|
2020-11-18 09:37:46 +01:00
|
|
|
hotplug_ctrl = qdev_get_hotplug_handler(dev);
|
2019-10-29 12:49:04 +01:00
|
|
|
if (hotplug_ctrl) {
|
2020-11-18 09:37:46 +01:00
|
|
|
pci_dev = PCI_DEVICE(dev);
|
2019-10-29 12:49:04 +01:00
|
|
|
pci_dev->partially_hotplugged = true;
|
2020-11-18 09:37:46 +01:00
|
|
|
hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
|
2019-10-29 12:49:04 +01:00
|
|
|
if (err) {
|
|
|
|
error_report_err(err);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-11-18 09:37:46 +01:00
|
|
|
static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
|
|
|
|
Error **errp)
|
2019-10-29 12:49:04 +01:00
|
|
|
{
|
2019-11-30 20:42:21 +01:00
|
|
|
Error *err = NULL;
|
2019-10-29 12:49:04 +01:00
|
|
|
HotplugHandler *hotplug_ctrl;
|
2020-11-18 09:37:46 +01:00
|
|
|
PCIDevice *pdev = PCI_DEVICE(dev);
|
2020-11-18 09:37:25 +01:00
|
|
|
BusState *primary_bus;
|
2019-10-29 12:49:04 +01:00
|
|
|
|
|
|
|
if (!pdev->partially_hotplugged) {
|
|
|
|
return true;
|
|
|
|
}
|
2020-11-18 09:37:46 +01:00
|
|
|
primary_bus = dev->parent_bus;
|
2020-11-18 09:37:25 +01:00
|
|
|
if (!primary_bus) {
|
2019-11-20 16:49:50 +01:00
|
|
|
error_setg(errp, "virtio_net: couldn't find primary bus");
|
2019-11-30 20:42:21 +01:00
|
|
|
return false;
|
2019-10-29 12:49:04 +01:00
|
|
|
}
|
2020-11-18 09:37:46 +01:00
|
|
|
qdev_set_parent_bus(dev, primary_bus, &error_abort);
|
2020-11-18 09:37:29 +01:00
|
|
|
qatomic_set(&n->failover_primary_hidden, false);
|
2020-11-18 09:37:46 +01:00
|
|
|
hotplug_ctrl = qdev_get_hotplug_handler(dev);
|
2019-11-20 16:49:50 +01:00
|
|
|
if (hotplug_ctrl) {
|
2020-11-18 09:37:46 +01:00
|
|
|
hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
|
2019-11-30 20:42:21 +01:00
|
|
|
if (err) {
|
|
|
|
goto out;
|
|
|
|
}
|
2020-11-18 09:37:46 +01:00
|
|
|
hotplug_handler_plug(hotplug_ctrl, dev, &err);
|
2019-11-20 16:49:50 +01:00
|
|
|
}
|
2021-06-29 17:29:37 +02:00
|
|
|
pdev->partially_hotplugged = false;
|
2019-11-20 16:49:50 +01:00
|
|
|
|
|
|
|
out:
|
2019-11-30 20:42:21 +01:00
|
|
|
error_propagate(errp, err);
|
|
|
|
return !err;
|
2019-10-29 12:49:04 +01:00
|
|
|
}
|
|
|
|
|
2020-11-18 09:37:47 +01:00
|
|
|
static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
|
2019-10-29 12:49:04 +01:00
|
|
|
{
|
|
|
|
bool should_be_hidden;
|
|
|
|
Error *err = NULL;
|
2020-11-18 09:37:47 +01:00
|
|
|
DeviceState *dev = failover_find_primary_device(n);
|
2019-10-29 12:49:04 +01:00
|
|
|
|
2020-11-18 09:37:47 +01:00
|
|
|
if (!dev) {
|
|
|
|
return;
|
2019-10-29 12:49:04 +01:00
|
|
|
}
|
|
|
|
|
2020-11-18 09:37:47 +01:00
|
|
|
should_be_hidden = qatomic_read(&n->failover_primary_hidden);
|
|
|
|
|
2019-11-30 20:42:20 +01:00
|
|
|
if (migration_in_setup(s) && !should_be_hidden) {
|
2020-11-18 09:37:47 +01:00
|
|
|
if (failover_unplug_primary(n, dev)) {
|
|
|
|
vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
|
|
|
|
qapi_event_send_unplug_primary(dev->id);
|
2020-11-18 09:37:29 +01:00
|
|
|
qatomic_set(&n->failover_primary_hidden, true);
|
2019-10-29 12:49:04 +01:00
|
|
|
} else {
|
|
|
|
warn_report("couldn't unplug primary device");
|
|
|
|
}
|
|
|
|
} else if (migration_has_failed(s)) {
|
2019-11-20 16:49:50 +01:00
|
|
|
/* We already unplugged the device let's plug it back */
|
2020-11-18 09:37:47 +01:00
|
|
|
if (!failover_replug_primary(n, dev, &err)) {
|
2019-10-29 12:49:04 +01:00
|
|
|
if (err) {
|
|
|
|
error_report_err(err);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
|
|
|
|
{
|
|
|
|
MigrationState *s = data;
|
|
|
|
VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
|
|
|
|
virtio_net_handle_migration_primary(n, s);
|
|
|
|
}
|
|
|
|
|
2020-11-18 09:37:37 +01:00
|
|
|
static bool failover_hide_primary_device(DeviceListener *listener,
|
|
|
|
QemuOpts *device_opts)
|
2019-10-29 12:49:04 +01:00
|
|
|
{
|
|
|
|
VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
|
2020-11-18 09:37:32 +01:00
|
|
|
const char *standby_id;
|
2019-10-29 12:49:04 +01:00
|
|
|
|
2019-11-20 16:49:51 +01:00
|
|
|
if (!device_opts) {
|
2020-11-18 09:37:36 +01:00
|
|
|
return false;
|
2019-11-20 16:49:51 +01:00
|
|
|
}
|
2020-11-18 09:37:32 +01:00
|
|
|
standby_id = qemu_opt_get(device_opts, "failover_pair_id");
|
2020-11-18 09:37:36 +01:00
|
|
|
if (g_strcmp0(standby_id, n->netclient_name) != 0) {
|
|
|
|
return false;
|
2019-10-29 12:49:04 +01:00
|
|
|
}
|
|
|
|
|
2020-11-18 09:37:29 +01:00
|
|
|
/* failover_primary_hidden is set during feature negotiation */
|
2020-11-18 09:37:45 +01:00
|
|
|
return qatomic_read(&n->failover_primary_hidden);
|
2019-10-29 12:49:04 +01:00
|
|
|
}
|
|
|
|
|
2013-07-30 02:36:06 +02:00
|
|
|
static void virtio_net_device_realize(DeviceState *dev, Error **errp)
|
2013-04-11 16:29:57 +02:00
|
|
|
{
|
2013-07-30 02:36:06 +02:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
2013-07-30 05:02:48 +02:00
|
|
|
VirtIONet *n = VIRTIO_NET(dev);
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
NetClientState *nc;
|
2013-07-30 05:02:48 +02:00
|
|
|
int i;
|
2013-04-11 16:30:02 +02:00
|
|
|
|
2016-12-10 16:30:38 +01:00
|
|
|
if (n->net_conf.mtu) {
|
2018-03-07 22:25:40 -05:00
|
|
|
n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
|
2016-12-10 16:30:38 +01:00
|
|
|
}
|
|
|
|
|
2018-03-07 22:25:41 -05:00
|
|
|
if (n->net_conf.duplex_str) {
|
|
|
|
if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
|
|
|
|
n->net_conf.duplex = DUPLEX_HALF;
|
|
|
|
} else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
|
|
|
|
n->net_conf.duplex = DUPLEX_FULL;
|
|
|
|
} else {
|
|
|
|
error_setg(errp, "'duplex' must be 'half' or 'full'");
|
2020-04-22 15:07:13 +02:00
|
|
|
return;
|
2018-03-07 22:25:41 -05:00
|
|
|
}
|
|
|
|
n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
|
|
|
|
} else {
|
|
|
|
n->net_conf.duplex = DUPLEX_UNKNOWN;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (n->net_conf.speed < SPEED_UNKNOWN) {
|
|
|
|
error_setg(errp, "'speed' must be between 0 and INT_MAX");
|
2020-04-22 15:07:13 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (n->net_conf.speed >= 0) {
|
2018-03-07 22:25:41 -05:00
|
|
|
n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
|
|
|
|
}
|
|
|
|
|
2019-10-29 12:49:04 +01:00
|
|
|
if (n->failover) {
|
2020-11-18 09:37:37 +01:00
|
|
|
n->primary_listener.hide_device = failover_hide_primary_device;
|
2020-11-18 09:37:29 +01:00
|
|
|
qatomic_set(&n->failover_primary_hidden, true);
|
2019-10-29 12:49:04 +01:00
|
|
|
device_listener_register(&n->primary_listener);
|
|
|
|
n->migration_state.notify = virtio_net_migration_state_notifier;
|
|
|
|
add_migration_state_change_notifier(&n->migration_state);
|
|
|
|
n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
|
|
|
|
}
|
|
|
|
|
2015-04-28 19:51:12 +08:00
|
|
|
virtio_net_set_config_size(n, n->host_features);
|
2013-07-30 05:02:48 +02:00
|
|
|
virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2016-08-10 17:47:16 +03:00
|
|
|
/*
|
|
|
|
* We set a lower limit on RX queue size to what it always was.
|
|
|
|
* Guests that want a smaller ring can always resize it without
|
|
|
|
* help from us (using virtio 1 and up).
|
|
|
|
*/
|
|
|
|
if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
|
|
|
|
n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
|
2017-07-13 09:44:38 +02:00
|
|
|
!is_power_of_2(n->net_conf.rx_queue_size)) {
|
2016-08-10 17:47:16 +03:00
|
|
|
error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
|
|
|
|
"must be a power of 2 between %d and %d.",
|
|
|
|
n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
|
|
|
|
VIRTQUEUE_MAX_SIZE);
|
|
|
|
virtio_cleanup(vdev);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-06-28 10:37:59 +08:00
|
|
|
if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
|
|
|
|
n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
|
|
|
|
!is_power_of_2(n->net_conf.tx_queue_size)) {
|
|
|
|
error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
|
|
|
|
"must be a power of 2 between %d and %d",
|
|
|
|
n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
|
|
|
|
VIRTQUEUE_MAX_SIZE);
|
|
|
|
virtio_cleanup(vdev);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-05-26 12:04:08 +02:00
|
|
|
n->max_queues = MAX(n->nic_conf.peers.queues, 1);
|
2015-05-29 14:15:31 +08:00
|
|
|
if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
|
2015-03-20 14:07:50 +08:00
|
|
|
error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
|
2015-04-09 20:32:39 +02:00
|
|
|
"must be a positive integer less than %d.",
|
2015-05-29 14:15:31 +08:00
|
|
|
n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
|
2015-03-20 14:07:50 +08:00
|
|
|
virtio_cleanup(vdev);
|
|
|
|
return;
|
|
|
|
}
|
2013-02-22 23:15:06 +08:00
|
|
|
n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
|
2013-01-30 19:12:39 +08:00
|
|
|
n->curr_queues = 1;
|
2013-04-11 16:30:02 +02:00
|
|
|
n->tx_timeout = n->net_conf.txtimer;
|
2010-09-02 09:01:10 -06:00
|
|
|
|
2013-04-11 16:30:02 +02:00
|
|
|
if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
|
|
|
|
&& strcmp(n->net_conf.tx, "bh")) {
|
2018-10-17 10:26:28 +02:00
|
|
|
warn_report("virtio-net: "
|
|
|
|
"Unknown option tx=%s, valid options: \"timer\" \"bh\"",
|
|
|
|
n->net_conf.tx);
|
|
|
|
error_printf("Defaulting to \"bh\"");
|
2010-09-02 09:01:10 -06:00
|
|
|
}
|
|
|
|
|
2017-07-03 22:25:24 +03:00
|
|
|
n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
|
|
|
|
n->net_conf.tx_queue_size);
|
2017-06-28 10:37:59 +08:00
|
|
|
|
2015-05-29 14:15:24 +08:00
|
|
|
for (i = 0; i < n->max_queues; i++) {
|
2015-07-15 17:20:59 +08:00
|
|
|
virtio_net_add_queue(n, i);
|
2010-09-02 09:01:10 -06:00
|
|
|
}
|
2015-05-29 14:15:24 +08:00
|
|
|
|
2013-04-11 16:30:01 +02:00
|
|
|
n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
|
2013-04-11 16:30:02 +02:00
|
|
|
qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
|
|
|
|
memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
|
2009-01-08 19:46:33 +00:00
|
|
|
n->status = VIRTIO_NET_S_LINK_UP;
|
2019-02-27 13:24:07 +00:00
|
|
|
qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
|
|
|
|
QEMU_CLOCK_VIRTUAL,
|
|
|
|
virtio_net_announce_timer, n);
|
2019-02-27 13:24:10 +00:00
|
|
|
n->announce_timer.round = 0;
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2013-05-15 14:12:49 +02:00
|
|
|
if (n->netclient_type) {
|
|
|
|
/*
|
|
|
|
* Happen when virtio_net_set_netclient_name has been called.
|
|
|
|
*/
|
|
|
|
n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
|
|
|
|
n->netclient_type, n->netclient_name, n);
|
|
|
|
} else {
|
|
|
|
n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
|
2013-07-30 05:02:48 +02:00
|
|
|
object_get_typename(OBJECT(dev)), dev->id, n);
|
2013-05-15 14:12:49 +02:00
|
|
|
}
|
|
|
|
|
2021-03-17 14:26:30 +08:00
|
|
|
for (i = 0; i < n->max_queues; i++) {
|
|
|
|
n->nic->ncs[i].do_not_pad = true;
|
|
|
|
}
|
|
|
|
|
2012-09-24 17:04:21 +02:00
|
|
|
peer_test_vnet_hdr(n);
|
|
|
|
if (peer_has_vnet_hdr(n)) {
|
2013-01-30 19:12:39 +08:00
|
|
|
for (i = 0; i < n->max_queues; i++) {
|
2014-02-20 12:14:07 +01:00
|
|
|
qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
|
2013-01-30 19:12:39 +08:00
|
|
|
}
|
2012-09-24 17:04:21 +02:00
|
|
|
n->host_hdr_len = sizeof(struct virtio_net_hdr);
|
|
|
|
} else {
|
|
|
|
n->host_hdr_len = 0;
|
|
|
|
}
|
2009-11-25 18:49:11 +00:00
|
|
|
|
2013-04-11 16:30:02 +02:00
|
|
|
qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
|
2009-01-07 17:47:15 +00:00
|
|
|
|
2013-01-30 19:12:39 +08:00
|
|
|
n->vqs[0].tx_waiting = 0;
|
2013-04-11 16:30:02 +02:00
|
|
|
n->tx_burst = n->net_conf.txburst;
|
2020-05-08 15:59:31 +03:00
|
|
|
virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
|
2009-02-05 22:36:20 +00:00
|
|
|
n->promisc = 1; /* for compatibility */
|
2008-12-17 19:13:11 +00:00
|
|
|
|
2011-08-20 22:09:37 -05:00
|
|
|
n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
|
2009-02-05 22:36:28 +00:00
|
|
|
|
2011-08-20 22:09:37 -05:00
|
|
|
n->vlans = g_malloc0(MAX_VLAN >> 3);
|
2009-02-05 22:36:32 +00:00
|
|
|
|
net: add support of mac-programming over macvtap in QEMU side
Currently macvtap based macvlan device is working in promiscuous
mode, we want to implement mac-programming over macvtap through
Libvirt for better performance.
Design:
QEMU notifies Libvirt when rx-filter config is changed in guest,
then Libvirt query the rx-filter information by a monitor command,
and sync the change to macvtap device. Related rx-filter config
of the nic contains main mac, rx-mode items and vlan table.
This patch adds a QMP event to notify management of rx-filter change,
and adds a monitor command for management to query rx-filter
information.
Test:
If we repeatedly add/remove vlan, and change macaddr of vlan
interfaces in guest by a loop script.
Result:
The events will flood the QMP client(management), management takes
too much resource to process the events.
Event_throttle API (set rate to 1 ms) can avoid the events to flood
QMP client, but it could cause an unexpected delay (~1ms), guests
guests normally expect rx-filter updates immediately.
So we use a flag for each nic to avoid events flooding, the event
is emitted once until the query command is executed. The flag
implementation could not introduce unexpected delay.
There maybe exist an uncontrollable delay if we let Libvirt do the
real change, guests normally expect rx-filter updates immediately.
But it's another separate issue, we can investigate it when the
work in Libvirt side is done.
Michael S. Tsirkin: tweaked to enable events on start
Michael S. Tsirkin: fixed not to crash when no id
Michael S. Tsirkin: fold in patch:
"additional fixes for mac-programming feature"
Amos Kong: always notify QMP client if mactable is changed
Amos Kong: return NULL list if no net client supports rx-filter query
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2013-06-14 15:45:52 +08:00
|
|
|
nc = qemu_get_queue(n->nic);
|
|
|
|
nc->rxfilter_notify_enabled = 1;
|
|
|
|
|
2020-09-25 23:13:33 +08:00
|
|
|
if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
|
|
|
|
struct virtio_net_config netcfg = {};
|
|
|
|
memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
|
|
|
|
vhost_net_set_config(get_vhost_net(nc->peer),
|
|
|
|
(uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
|
|
|
|
}
|
virtio-net: support RSC v4/v6 tcp traffic for Windows HCK
This commit adds implementation of RX packets
coalescing, compatible with requirements of Windows
Hardware compatibility kit.
The device enables feature VIRTIO_NET_F_RSC_EXT in
host features if it supports extended RSC functionality
as defined in the specification.
This feature requires at least one of VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6. Windows guest driver acks
this feature only if VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
is also present.
If the guest driver acks VIRTIO_NET_F_RSC_EXT feature,
the device coalesces TCPv4 and TCPv6 packets (if
respective VIRTIO_NET_F_GUEST_TSO feature is on,
populates extended RSC information in virtio header
and sets VIRTIO_NET_HDR_F_RSC_INFO bit in header flags.
The device does not recalculate checksums in the coalesced
packet, so they are not valid.
In this case:
All the data packets in a tcp connection are cached
to a single buffer in every receive interval, and will
be sent out via a timer, the 'virtio_net_rsc_timeout'
controls the interval, this value may impact the
performance and response time of tcp connection,
50000(50us) is an experience value to gain a performance
improvement, since the whql test sends packets every 100us,
so '300000(300us)' passes the test case, it is the default
value as well, tune it via the command line parameter
'rsc_interval' within 'virtio-net-pci' device, for example,
to launch a guest with interval set as '500000':
'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,
guest_rsc_ext=on,rsc_interval=500000'
The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.
'NetRscChain' is used to save the segments of IPv4/6 in a
VirtIONet device.
A new segment becomes a 'Candidate' as well as it passed sanity check,
the main handler of TCP includes TCP window update, duplicated
ACK check and the real data coalescing.
An 'Candidate' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.
Sanity check includes:
1. Incorrect version in IP header
2. An IP options or IP fragment
3. Not a TCP packet
4. Sanity size check to prevent buffer overflow attack.
5. An ECN packet
Even though, there might more cases should be considered such as
ip identification other flags, while it breaks the test because
windows set it to the same even it's not a fragment.
Normally it includes 2 typical ways to handle a TCP control flag,
'bypass' and 'finalize', 'bypass' means should be sent out directly,
while 'finalize' means the packets should also be bypassed, but this
should be done after search for the same connection packets in the
pool and drain all of them out, this is to avoid out of order fragment.
All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a
finalization, because this normally happens upon a connection is going
to be closed, an 'URG' packet also finalize current coalescing unit.
Statistics can be used to monitor the basic coalescing status, the
'out of order' and 'out of window' means how many retransmitting packets,
thus describe the performance intuitively.
Difference between ip v4 and v6 processing:
Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real 65535 payload.
Note that main goal of implementing this feature in software
is to create reference setup for certification tests. In such
setups guest migration is not required, so the coalesced packets
not yet delivered to the guest will be lost in case of migration.
Signed-off-by: Wei Xu <wexu@redhat.com>
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-01-03 15:12:29 +02:00
|
|
|
QTAILQ_INIT(&n->rsc_chains);
|
2013-07-30 05:02:48 +02:00
|
|
|
n->qdev = dev;
|
2020-05-08 15:59:29 +03:00
|
|
|
|
|
|
|
net_rx_pkt_init(&n->rx_pkt, false);
|
2021-05-14 14:48:33 +03:00
|
|
|
|
|
|
|
if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
|
|
|
|
virtio_net_load_ebpf(n);
|
|
|
|
}
|
2013-04-11 16:29:57 +02:00
|
|
|
}
|
|
|
|
|
qdev: Unrealize must not fail
Devices may have component devices and buses.
Device realization may fail. Realization is recursive: a device's
realize() method realizes its components, and device_set_realized()
realizes its buses (which should in turn realize the devices on that
bus, except bus_set_realized() doesn't implement that, yet).
When realization of a component or bus fails, we need to roll back:
unrealize everything we realized so far. If any of these unrealizes
failed, the device would be left in an inconsistent state. Must not
happen.
device_set_realized() lets it happen: it ignores errors in the roll
back code starting at label child_realize_fail.
Since realization is recursive, unrealization must be recursive, too.
But how could a partly failed unrealize be rolled back? We'd have to
re-realize, which can fail. This design is fundamentally broken.
device_set_realized() does not roll back at all. Instead, it keeps
unrealizing, ignoring further errors.
It can screw up even for a device with no buses: if the lone
dc->unrealize() fails, it still unregisters vmstate, and calls
listeners' unrealize() callback.
bus_set_realized() does not roll back either. Instead, it stops
unrealizing.
Fortunately, no unrealize method can fail, as we'll see below.
To fix the design error, drop parameter @errp from all the unrealize
methods.
Any unrealize method that uses @errp now needs an update. This leads
us to unrealize() methods that can fail. Merely passing it to another
unrealize method cannot cause failure, though. Here are the ones that
do other things with @errp:
* virtio_serial_device_unrealize()
Fails when qbus_set_hotplug_handler() fails, but still does all the
other work. On failure, the device would stay realized with its
resources completely gone. Oops. Can't happen, because
qbus_set_hotplug_handler() can't actually fail here. Pass
&error_abort to qbus_set_hotplug_handler() instead.
* hw/ppc/spapr_drc.c's unrealize()
Fails when object_property_del() fails, but all the other work is
already done. On failure, the device would stay realized with its
vmstate registration gone. Oops. Can't happen, because
object_property_del() can't actually fail here. Pass &error_abort
to object_property_del() instead.
* spapr_phb_unrealize()
Fails and bails out when remove_drcs() fails, but other work is
already done. On failure, the device would stay realized with some
of its resources gone. Oops. remove_drcs() fails only when
chassis_from_bus()'s object_property_get_uint() fails, and it can't
here. Pass &error_abort to remove_drcs() instead.
Therefore, no unrealize method can fail before this patch.
device_set_realized()'s recursive unrealization via bus uses
object_property_set_bool(). Can't drop @errp there, so pass
&error_abort.
We similarly unrealize with object_property_set_bool() elsewhere,
always ignoring errors. Pass &error_abort instead.
Several unrealize methods no longer handle errors from other unrealize
methods: virtio_9p_device_unrealize(),
virtio_input_device_unrealize(), scsi_qdev_unrealize(), ...
Much of the deleted error handling looks wrong anyway.
One unrealize methods no longer ignore such errors:
usb_ehci_pci_exit().
Several realize methods no longer ignore errors when rolling back:
v9fs_device_realize_common(), pci_qdev_unrealize(),
spapr_phb_realize(), usb_qdev_realize(), vfio_ccw_realize(),
virtio_device_realize().
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20200505152926.18877-17-armbru@redhat.com>
2020-05-05 17:29:24 +02:00
|
|
|
static void virtio_net_device_unrealize(DeviceState *dev)
|
2013-04-11 16:29:57 +02:00
|
|
|
{
|
2013-07-30 03:50:44 +02:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
|
|
|
VirtIONet *n = VIRTIO_NET(dev);
|
2015-07-15 17:20:59 +08:00
|
|
|
int i, max_queues;
|
2013-04-11 16:29:57 +02:00
|
|
|
|
2021-05-14 14:48:33 +03:00
|
|
|
if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
|
|
|
|
virtio_net_unload_ebpf(n);
|
|
|
|
}
|
|
|
|
|
2013-04-11 16:29:57 +02:00
|
|
|
/* This will stop vhost backend if appropriate. */
|
|
|
|
virtio_net_set_status(vdev, 0);
|
|
|
|
|
2014-06-06 18:43:29 +02:00
|
|
|
g_free(n->netclient_name);
|
|
|
|
n->netclient_name = NULL;
|
|
|
|
g_free(n->netclient_type);
|
|
|
|
n->netclient_type = NULL;
|
2013-05-15 14:12:49 +02:00
|
|
|
|
2013-04-11 16:29:57 +02:00
|
|
|
g_free(n->mac_table.macs);
|
|
|
|
g_free(n->vlans);
|
|
|
|
|
2019-10-29 12:49:04 +01:00
|
|
|
if (n->failover) {
|
2020-07-03 13:48:13 +02:00
|
|
|
device_listener_unregister(&n->primary_listener);
|
virtio-net: failover: add missing remove_migration_state_change_notifier()
In the failover case configuration, virtio_net_device_realize() uses an
add_migration_state_change_notifier() to add a state notifier, but this
notifier is not removed by the unrealize function when the virtio-net
card is unplugged.
If the card is unplugged and a migration is started, the notifier is
called and as it is not valid anymore QEMU crashes.
This patch fixes the problem by adding the
remove_migration_state_change_notifier() in virtio_net_device_unrealize().
The problem can be reproduced with:
$ qemu-system-x86_64 -enable-kvm -m 1g -M q35 \
-device pcie-root-port,slot=4,id=root1 \
-device pcie-root-port,slot=5,id=root2 \
-device virtio-net-pci,id=net1,mac=52:54:00:6f:55:cc,failover=on,bus=root1 \
-monitor stdio disk.qcow2
(qemu) device_del net1
(qemu) migrate "exec:gzip -c > STATEFILE.gz"
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
0x0000000000000000 in ?? ()
(gdb) bt
#0 0x0000000000000000 in ()
#1 0x0000555555d726d7 in notifier_list_notify (...)
at .../util/notify.c:39
#2 0x0000555555842c1a in migrate_fd_connect (...)
at .../migration/migration.c:3975
#3 0x0000555555950f7d in migration_channel_connect (...)
error@entry=0x0) at .../migration/channel.c:107
#4 0x0000555555910922 in exec_start_outgoing_migration (...)
at .../migration/exec.c:42
Reported-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2021-04-27 15:51:47 +02:00
|
|
|
remove_migration_state_change_notifier(&n->migration_state);
|
2019-10-29 12:49:04 +01:00
|
|
|
}
|
|
|
|
|
2015-07-15 17:20:59 +08:00
|
|
|
max_queues = n->multiqueue ? n->max_queues : 1;
|
|
|
|
for (i = 0; i < max_queues; i++) {
|
|
|
|
virtio_net_del_queue(n, i);
|
2013-04-11 16:29:57 +02:00
|
|
|
}
|
2019-12-26 06:36:49 +02:00
|
|
|
/* delete also control vq */
|
|
|
|
virtio_del_queue(vdev, max_queues * 2);
|
2019-06-20 19:47:04 +01:00
|
|
|
qemu_announce_timer_del(&n->announce_timer, false);
|
2013-04-11 16:29:57 +02:00
|
|
|
g_free(n->vqs);
|
|
|
|
qemu_del_nic(n->nic);
|
virtio-net: support RSC v4/v6 tcp traffic for Windows HCK
This commit adds implementation of RX packets
coalescing, compatible with requirements of Windows
Hardware compatibility kit.
The device enables feature VIRTIO_NET_F_RSC_EXT in
host features if it supports extended RSC functionality
as defined in the specification.
This feature requires at least one of VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6. Windows guest driver acks
this feature only if VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
is also present.
If the guest driver acks VIRTIO_NET_F_RSC_EXT feature,
the device coalesces TCPv4 and TCPv6 packets (if
respective VIRTIO_NET_F_GUEST_TSO feature is on,
populates extended RSC information in virtio header
and sets VIRTIO_NET_HDR_F_RSC_INFO bit in header flags.
The device does not recalculate checksums in the coalesced
packet, so they are not valid.
In this case:
All the data packets in a tcp connection are cached
to a single buffer in every receive interval, and will
be sent out via a timer, the 'virtio_net_rsc_timeout'
controls the interval, this value may impact the
performance and response time of tcp connection,
50000(50us) is an experience value to gain a performance
improvement, since the whql test sends packets every 100us,
so '300000(300us)' passes the test case, it is the default
value as well, tune it via the command line parameter
'rsc_interval' within 'virtio-net-pci' device, for example,
to launch a guest with interval set as '500000':
'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,
guest_rsc_ext=on,rsc_interval=500000'
The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.
'NetRscChain' is used to save the segments of IPv4/6 in a
VirtIONet device.
A new segment becomes a 'Candidate' as well as it passed sanity check,
the main handler of TCP includes TCP window update, duplicated
ACK check and the real data coalescing.
An 'Candidate' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.
Sanity check includes:
1. Incorrect version in IP header
2. An IP options or IP fragment
3. Not a TCP packet
4. Sanity size check to prevent buffer overflow attack.
5. An ECN packet
Even though, there might more cases should be considered such as
ip identification other flags, while it breaks the test because
windows set it to the same even it's not a fragment.
Normally it includes 2 typical ways to handle a TCP control flag,
'bypass' and 'finalize', 'bypass' means should be sent out directly,
while 'finalize' means the packets should also be bypassed, but this
should be done after search for the same connection packets in the
pool and drain all of them out, this is to avoid out of order fragment.
All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a
finalization, because this normally happens upon a connection is going
to be closed, an 'URG' packet also finalize current coalescing unit.
Statistics can be used to monitor the basic coalescing status, the
'out of order' and 'out of window' means how many retransmitting packets,
thus describe the performance intuitively.
Difference between ip v4 and v6 processing:
Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real 65535 payload.
Note that main goal of implementing this feature in software
is to create reference setup for certification tests. In such
setups guest migration is not required, so the coalesced packets
not yet delivered to the guest will be lost in case of migration.
Signed-off-by: Wei Xu <wexu@redhat.com>
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-01-03 15:12:29 +02:00
|
|
|
virtio_net_rsc_cleanup(n);
|
2020-05-08 15:59:28 +03:00
|
|
|
g_free(n->rss_data.indirections_table);
|
2020-05-08 15:59:29 +03:00
|
|
|
net_rx_pkt_uninit(n->rx_pkt);
|
2013-04-24 10:21:22 +02:00
|
|
|
virtio_cleanup(vdev);
|
2013-04-11 16:29:57 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void virtio_net_instance_init(Object *obj)
|
|
|
|
{
|
|
|
|
VirtIONet *n = VIRTIO_NET(obj);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The default config_size is sizeof(struct virtio_net_config).
|
|
|
|
* Can be overriden with virtio_net_set_config_size.
|
|
|
|
*/
|
|
|
|
n->config_size = sizeof(struct virtio_net_config);
|
2014-10-07 16:00:12 +08:00
|
|
|
device_add_bootindex_property(obj, &n->nic_conf.bootindex,
|
|
|
|
"bootindex", "/ethernet-phy@0",
|
2020-05-05 17:29:23 +02:00
|
|
|
DEVICE(n));
|
2021-05-14 14:48:33 +03:00
|
|
|
|
|
|
|
ebpf_rss_init(&n->ebpf_rss);
|
2013-04-11 16:29:57 +02:00
|
|
|
}
|
|
|
|
|
2017-09-25 12:29:12 +01:00
|
|
|
static int virtio_net_pre_save(void *opaque)
|
2016-10-06 14:55:41 +02:00
|
|
|
{
|
|
|
|
VirtIONet *n = opaque;
|
|
|
|
|
|
|
|
/* At this point, backend must be stopped, otherwise
|
|
|
|
* it might keep writing to memory. */
|
|
|
|
assert(!n->vhost_started);
|
2017-09-25 12:29:12 +01:00
|
|
|
|
|
|
|
return 0;
|
2016-10-06 14:55:41 +02:00
|
|
|
}
|
|
|
|
|
2019-10-29 12:49:04 +01:00
|
|
|
static bool primary_unplug_pending(void *opaque)
|
|
|
|
{
|
|
|
|
DeviceState *dev = opaque;
|
2020-11-18 09:37:48 +01:00
|
|
|
DeviceState *primary;
|
2019-10-29 12:49:04 +01:00
|
|
|
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
|
|
|
|
VirtIONet *n = VIRTIO_NET(vdev);
|
|
|
|
|
2019-11-20 16:49:48 +01:00
|
|
|
if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
|
|
|
|
return false;
|
|
|
|
}
|
2020-11-18 09:37:48 +01:00
|
|
|
primary = failover_find_primary_device(n);
|
|
|
|
return primary ? primary->pending_deleted_event : false;
|
2019-10-29 12:49:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static bool dev_unplug_pending(void *opaque)
|
|
|
|
{
|
|
|
|
DeviceState *dev = opaque;
|
|
|
|
VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
|
|
|
|
|
|
|
|
return vdc->primary_unplug_pending(dev);
|
|
|
|
}
|
|
|
|
|
2016-10-06 14:55:41 +02:00
|
|
|
static const VMStateDescription vmstate_virtio_net = {
|
|
|
|
.name = "virtio-net",
|
|
|
|
.minimum_version_id = VIRTIO_NET_VM_VERSION,
|
|
|
|
.version_id = VIRTIO_NET_VM_VERSION,
|
|
|
|
.fields = (VMStateField[]) {
|
|
|
|
VMSTATE_VIRTIO_DEVICE,
|
|
|
|
VMSTATE_END_OF_LIST()
|
|
|
|
},
|
|
|
|
.pre_save = virtio_net_pre_save,
|
2019-10-29 12:49:04 +01:00
|
|
|
.dev_unplug_pending = dev_unplug_pending,
|
2016-10-06 14:55:41 +02:00
|
|
|
};
|
2016-07-14 18:22:50 +01:00
|
|
|
|
2013-04-11 16:29:57 +02:00
|
|
|
static Property virtio_net_properties[] = {
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
|
|
|
|
VIRTIO_NET_F_CSUM, true),
|
|
|
|
DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_GUEST_CSUM, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
|
|
|
|
DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_GUEST_TSO4, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_GUEST_TSO6, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_GUEST_ECN, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_GUEST_UFO, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_GUEST_ANNOUNCE, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_HOST_TSO4, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_HOST_TSO6, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_HOST_ECN, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_HOST_UFO, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_MRG_RXBUF, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("status", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_STATUS, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_CTRL_VQ, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_CTRL_RX, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_CTRL_VLAN, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_CTRL_RX_EXTRA, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_CTRL_MAC_ADDR, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
|
2015-06-10 23:04:30 +08:00
|
|
|
VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
|
2018-03-07 22:25:40 -05:00
|
|
|
DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
|
2020-05-08 15:59:28 +03:00
|
|
|
DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
|
|
|
|
VIRTIO_NET_F_RSS, false),
|
2020-05-08 15:59:31 +03:00
|
|
|
DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
|
|
|
|
VIRTIO_NET_F_HASH_REPORT, false),
|
virtio-net: support RSC v4/v6 tcp traffic for Windows HCK
This commit adds implementation of RX packets
coalescing, compatible with requirements of Windows
Hardware compatibility kit.
The device enables feature VIRTIO_NET_F_RSC_EXT in
host features if it supports extended RSC functionality
as defined in the specification.
This feature requires at least one of VIRTIO_NET_F_GUEST_TSO4,
VIRTIO_NET_F_GUEST_TSO6. Windows guest driver acks
this feature only if VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
is also present.
If the guest driver acks VIRTIO_NET_F_RSC_EXT feature,
the device coalesces TCPv4 and TCPv6 packets (if
respective VIRTIO_NET_F_GUEST_TSO feature is on,
populates extended RSC information in virtio header
and sets VIRTIO_NET_HDR_F_RSC_INFO bit in header flags.
The device does not recalculate checksums in the coalesced
packet, so they are not valid.
In this case:
All the data packets in a tcp connection are cached
to a single buffer in every receive interval, and will
be sent out via a timer, the 'virtio_net_rsc_timeout'
controls the interval, this value may impact the
performance and response time of tcp connection,
50000(50us) is an experience value to gain a performance
improvement, since the whql test sends packets every 100us,
so '300000(300us)' passes the test case, it is the default
value as well, tune it via the command line parameter
'rsc_interval' within 'virtio-net-pci' device, for example,
to launch a guest with interval set as '500000':
'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,
guest_rsc_ext=on,rsc_interval=500000'
The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.
'NetRscChain' is used to save the segments of IPv4/6 in a
VirtIONet device.
A new segment becomes a 'Candidate' as well as it passed sanity check,
the main handler of TCP includes TCP window update, duplicated
ACK check and the real data coalescing.
An 'Candidate' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.
Sanity check includes:
1. Incorrect version in IP header
2. An IP options or IP fragment
3. Not a TCP packet
4. Sanity size check to prevent buffer overflow attack.
5. An ECN packet
Even though, there might more cases should be considered such as
ip identification other flags, while it breaks the test because
windows set it to the same even it's not a fragment.
Normally it includes 2 typical ways to handle a TCP control flag,
'bypass' and 'finalize', 'bypass' means should be sent out directly,
while 'finalize' means the packets should also be bypassed, but this
should be done after search for the same connection packets in the
pool and drain all of them out, this is to avoid out of order fragment.
All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a
finalization, because this normally happens upon a connection is going
to be closed, an 'URG' packet also finalize current coalescing unit.
Statistics can be used to monitor the basic coalescing status, the
'out of order' and 'out of window' means how many retransmitting packets,
thus describe the performance intuitively.
Difference between ip v4 and v6 processing:
Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real 65535 payload.
Note that main goal of implementing this feature in software
is to create reference setup for certification tests. In such
setups guest migration is not required, so the coalesced packets
not yet delivered to the guest will be lost in case of migration.
Signed-off-by: Wei Xu <wexu@redhat.com>
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
2019-01-03 15:12:29 +02:00
|
|
|
DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
|
|
|
|
VIRTIO_NET_F_RSC_EXT, false),
|
|
|
|
DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
|
|
|
|
VIRTIO_NET_RSC_DEFAULT_INTERVAL),
|
2013-04-11 16:29:57 +02:00
|
|
|
DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
|
|
|
|
DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
|
2015-06-10 23:04:30 +08:00
|
|
|
TX_TIMER_INTERVAL),
|
2013-04-11 16:29:57 +02:00
|
|
|
DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
|
|
|
|
DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
|
2016-08-10 17:47:16 +03:00
|
|
|
DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
|
|
|
|
VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
|
2017-06-28 10:37:59 +08:00
|
|
|
DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
|
|
|
|
VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
|
2016-12-10 16:30:38 +01:00
|
|
|
DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
|
2017-05-23 14:31:19 +02:00
|
|
|
DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
|
|
|
|
true),
|
2018-03-07 22:25:41 -05:00
|
|
|
DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
|
|
|
|
DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
|
2019-10-29 12:49:04 +01:00
|
|
|
DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
|
2013-04-11 16:29:57 +02:00
|
|
|
DEFINE_PROP_END_OF_LIST(),
|
|
|
|
};
|
|
|
|
|
|
|
|
static void virtio_net_class_init(ObjectClass *klass, void *data)
|
|
|
|
{
|
|
|
|
DeviceClass *dc = DEVICE_CLASS(klass);
|
|
|
|
VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
|
2013-07-30 02:36:06 +02:00
|
|
|
|
2020-01-10 19:30:32 +04:00
|
|
|
device_class_set_props(dc, virtio_net_properties);
|
2016-07-14 18:22:50 +01:00
|
|
|
dc->vmsd = &vmstate_virtio_net;
|
2013-07-29 17:17:45 +03:00
|
|
|
set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
|
2013-07-30 02:36:06 +02:00
|
|
|
vdc->realize = virtio_net_device_realize;
|
2013-07-30 03:50:44 +02:00
|
|
|
vdc->unrealize = virtio_net_device_unrealize;
|
2013-04-11 16:29:57 +02:00
|
|
|
vdc->get_config = virtio_net_get_config;
|
|
|
|
vdc->set_config = virtio_net_set_config;
|
|
|
|
vdc->get_features = virtio_net_get_features;
|
|
|
|
vdc->set_features = virtio_net_set_features;
|
|
|
|
vdc->bad_features = virtio_net_bad_features;
|
|
|
|
vdc->reset = virtio_net_reset;
|
|
|
|
vdc->set_status = virtio_net_set_status;
|
|
|
|
vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
|
|
|
|
vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
|
2016-11-04 12:27:52 +02:00
|
|
|
vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
|
virtio-net: prevent offloads reset on migration
Currently offloads disabled by guest via the VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET
command are not preserved on VM migration.
Instead all offloads reported by guest features (via VIRTIO_PCI_GUEST_FEATURES)
get enabled.
What happens is: first the VirtIONet::curr_guest_offloads gets restored and offloads
are getting set correctly:
#0 qemu_set_offload (nc=0x555556a11400, csum=1, tso4=0, tso6=0, ecn=0, ufo=0) at net/net.c:474
#1 virtio_net_apply_guest_offloads (n=0x555557701ca0) at hw/net/virtio-net.c:720
#2 virtio_net_post_load_device (opaque=0x555557701ca0, version_id=11) at hw/net/virtio-net.c:2334
#3 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577c80 <vmstate_virtio_net_device>, opaque=0x555557701ca0, version_id=11)
at migration/vmstate.c:168
#4 virtio_load (vdev=0x555557701ca0, f=0x5555569dc010, version_id=11) at hw/virtio/virtio.c:2197
#5 virtio_device_get (f=0x5555569dc010, opaque=0x555557701ca0, size=0, field=0x55555668cd00 <__compound_literal.5>) at hw/virtio/virtio.c:2036
#6 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577ce0 <vmstate_virtio_net>, opaque=0x555557701ca0, version_id=11) at migration/vmstate.c:143
#7 vmstate_load (f=0x5555569dc010, se=0x5555578189e0) at migration/savevm.c:829
#8 qemu_loadvm_section_start_full (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2211
#9 qemu_loadvm_state_main (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2395
#10 qemu_loadvm_state (f=0x5555569dc010) at migration/savevm.c:2467
#11 process_incoming_migration_co (opaque=0x0) at migration/migration.c:449
However later on the features are getting restored, and offloads get reset to
everything supported by features:
#0 qemu_set_offload (nc=0x555556a11400, csum=1, tso4=1, tso6=1, ecn=0, ufo=0) at net/net.c:474
#1 virtio_net_apply_guest_offloads (n=0x555557701ca0) at hw/net/virtio-net.c:720
#2 virtio_net_set_features (vdev=0x555557701ca0, features=5104441767) at hw/net/virtio-net.c:773
#3 virtio_set_features_nocheck (vdev=0x555557701ca0, val=5104441767) at hw/virtio/virtio.c:2052
#4 virtio_load (vdev=0x555557701ca0, f=0x5555569dc010, version_id=11) at hw/virtio/virtio.c:2220
#5 virtio_device_get (f=0x5555569dc010, opaque=0x555557701ca0, size=0, field=0x55555668cd00 <__compound_literal.5>) at hw/virtio/virtio.c:2036
#6 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577ce0 <vmstate_virtio_net>, opaque=0x555557701ca0, version_id=11) at migration/vmstate.c:143
#7 vmstate_load (f=0x5555569dc010, se=0x5555578189e0) at migration/savevm.c:829
#8 qemu_loadvm_section_start_full (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2211
#9 qemu_loadvm_state_main (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2395
#10 qemu_loadvm_state (f=0x5555569dc010) at migration/savevm.c:2467
#11 process_incoming_migration_co (opaque=0x0) at migration/migration.c:449
Fix this by preserving the state in saved_guest_offloads field and
pushing out offload initialization to the new post load hook.
Cc: qemu-stable@nongnu.org
Signed-off-by: Mikhail Sennikovsky <mikhail.sennikovskii@cloud.ionos.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
2019-10-11 15:58:04 +02:00
|
|
|
vdc->post_load = virtio_net_post_load_virtio;
|
2017-02-03 16:06:51 +00:00
|
|
|
vdc->vmsd = &vmstate_virtio_net_device;
|
2019-10-29 12:49:04 +01:00
|
|
|
vdc->primary_unplug_pending = primary_unplug_pending;
|
2013-04-11 16:29:57 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static const TypeInfo virtio_net_info = {
|
|
|
|
.name = TYPE_VIRTIO_NET,
|
|
|
|
.parent = TYPE_VIRTIO_DEVICE,
|
|
|
|
.instance_size = sizeof(VirtIONet),
|
|
|
|
.instance_init = virtio_net_instance_init,
|
|
|
|
.class_init = virtio_net_class_init,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void virtio_register_types(void)
|
|
|
|
{
|
|
|
|
type_register_static(&virtio_net_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
type_init(virtio_register_types)
|