-----BEGIN PGP SIGNATURE-----

Version: GnuPG v1
 
 iQEcBAABAgAGBQJfDwlTAAoJEO8Ells5jWIRQSAIAIXTZAn/Ui+9GpqTNtYRTu+n
 RngmAtkPim7NFz0R6hv3CjvkKcMQHXvj1JsJkwV47ww+LRiKHTh6U6r9V637hhEc
 gI1X1mLOUWcHe1Sj1hqvLUoLnPsnjoigShGbILFTRSInMYiuPbw7xihSyw+MPREK
 yheEHztm7DdlnPHp1wCqFJkxYAQMwpThJUwQHbqoGNiYDGZZvfMaigi7bBmOgloz
 i3aRc/J7skfK9GOwVXwqbDoHeWRk5No8y/sEXXUZva7fFol8Unfvw5ubSuQY6Nw0
 fOB+C4N9o8lz9mIrbPkVqbZ3U+/+XIGUt2/JmOqEL6hhXMedh2261WjhC1K4cT8=
 =UURQ
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging

# gpg: Signature made Wed 15 Jul 2020 14:49:07 BST
# gpg:                using RSA key EF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" [marginal]
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* remotes/jasowang/tags/net-pull-request:
  ftgmac100: fix dblac write test
  net: detect errors from probing vnet hdr flag for TAP devices
  net: check if the file descriptor is valid before using it
  qemu-options.hx: Clean up and fix typo for colo-compare
  net/colo-compare.c: Expose compare "max_queue_size" to users
  hw/net: Added CSO for IPv6
  virtio-net: fix removal of failover device

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2020-07-16 13:12:05 +01:00
commit ee5128bb00
15 changed files with 188 additions and 77 deletions

View File

@ -810,16 +810,18 @@ static void ftgmac100_write(void *opaque, hwaddr addr,
s->phydata = value & 0xffff;
break;
case FTGMAC100_DBLAC: /* DMA Burst Length and Arbitration Control */
if (FTGMAC100_DBLAC_TXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) {
if (FTGMAC100_DBLAC_TXDES_SIZE(value) < sizeof(FTGMAC100Desc)) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: transmit descriptor too small : %d bytes\n",
__func__, FTGMAC100_DBLAC_TXDES_SIZE(s->dblac));
"%s: transmit descriptor too small: %" PRIx64
" bytes\n", __func__,
FTGMAC100_DBLAC_TXDES_SIZE(value));
break;
}
if (FTGMAC100_DBLAC_RXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) {
if (FTGMAC100_DBLAC_RXDES_SIZE(value) < sizeof(FTGMAC100Desc)) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: receive descriptor too small : %d bytes\n",
__func__, FTGMAC100_DBLAC_RXDES_SIZE(s->dblac));
"%s: receive descriptor too small : %" PRIx64
" bytes\n", __func__,
FTGMAC100_DBLAC_RXDES_SIZE(value));
break;
}
s->dblac = value;

View File

@ -468,8 +468,8 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt)
/* num of iovec without vhdr */
uint32_t iov_len = pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1;
uint16_t csl;
struct ip_header *iphdr;
size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset;
uint16_t l3_proto = eth_get_l3_proto(iov, 1, iov->iov_len);
/* Put zero to checksum field */
iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum);
@ -477,9 +477,18 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt)
/* Calculate L4 TCP/UDP checksum */
csl = pkt->payload_len;
csum_cntr = 0;
cso = 0;
/* add pseudo header to csum */
iphdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
csum_cntr = eth_calc_ip4_pseudo_hdr_csum(iphdr, csl, &cso);
if (l3_proto == ETH_P_IP) {
csum_cntr = eth_calc_ip4_pseudo_hdr_csum(
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base,
csl, &cso);
} else if (l3_proto == ETH_P_IPV6) {
csum_cntr = eth_calc_ip6_pseudo_hdr_csum(
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base,
csl, pkt->l4proto, &cso);
}
/* data checksum */
csum_cntr +=

View File

@ -3416,6 +3416,7 @@ static void virtio_net_device_unrealize(DeviceState *dev)
g_free(n->vlans);
if (n->failover) {
device_listener_unregister(&n->primary_listener);
g_free(n->primary_device_id);
g_free(n->standby_id);
qobject_unref(n->primary_device_dict);

View File

@ -18,6 +18,7 @@ int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
int socket_set_cork(int fd, int v);
int socket_set_nodelay(int fd);
void qemu_set_block(int fd);
int qemu_try_set_nonblock(int fd);
void qemu_set_nonblock(int fd);
int socket_set_fast_reuse(int fd);

View File

@ -59,6 +59,7 @@ static bool colo_compare_active;
static QemuMutex event_mtx;
static QemuCond event_complete_cond;
static int event_unhandled_count;
static uint32_t max_queue_size;
/*
* + CompareState ++
@ -222,7 +223,7 @@ static void fill_pkt_tcp_info(void *data, uint32_t *max_ack)
*/
static int colo_insert_packet(GQueue *queue, Packet *pkt, uint32_t *max_ack)
{
if (g_queue_get_length(queue) <= MAX_QUEUE_SIZE) {
if (g_queue_get_length(queue) <= max_queue_size) {
if (pkt->ip->ip_p == IPPROTO_TCP) {
fill_pkt_tcp_info(pkt, max_ack);
g_queue_insert_sorted(queue,
@ -1134,6 +1135,37 @@ static void compare_set_expired_scan_cycle(Object *obj, Visitor *v,
s->expired_scan_cycle = value;
}
static void get_max_queue_size(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
uint32_t value = max_queue_size;
visit_type_uint32(v, name, &value, errp);
}
static void set_max_queue_size(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
Error *local_err = NULL;
uint32_t value;
visit_type_uint32(v, name, &value, &local_err);
if (local_err) {
goto out;
}
if (!value) {
error_setg(&local_err, "Property '%s.%s' requires a positive value",
object_get_typename(obj), name);
goto out;
}
max_queue_size = value;
out:
error_propagate(errp, local_err);
}
static void compare_pri_rs_finalize(SocketReadState *pri_rs)
{
CompareState *s = container_of(pri_rs, CompareState, pri_rs);
@ -1251,6 +1283,11 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
s->expired_scan_cycle = REGULAR_PACKET_CHECK_MS;
}
if (!max_queue_size) {
/* Set default queue size to 1024 */
max_queue_size = MAX_QUEUE_SIZE;
}
if (find_and_check_chardev(&chr, s->pri_indev, errp) ||
!qemu_chr_fe_init(&s->chr_pri_in, chr, errp)) {
return;
@ -1370,6 +1407,10 @@ static void colo_compare_init(Object *obj)
compare_get_expired_scan_cycle,
compare_set_expired_scan_cycle, NULL, NULL);
object_property_add(obj, "max_queue_size", "uint32",
get_max_queue_size,
set_max_queue_size, NULL, NULL);
s->vnet_hdr = false;
object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr,
compare_set_vnet_hdr);

View File

@ -725,13 +725,18 @@ int net_init_socket(const Netdev *netdev, const char *name,
}
if (sock->has_fd) {
int fd;
int fd, ret;
fd = monitor_fd_param(cur_mon, sock->fd, errp);
if (fd == -1) {
return -1;
}
qemu_set_nonblock(fd);
ret = qemu_try_set_nonblock(fd);
if (ret < 0) {
error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
name, fd);
return -1;
}
if (!net_socket_fd_init(peer, "socket", name, fd, 1, sock->mcast,
errp)) {
return -1;

View File

@ -211,7 +211,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
{
}
int tap_probe_vnet_hdr(int fd)
int tap_probe_vnet_hdr(int fd, Error **errp)
{
return 0;
}

View File

@ -147,13 +147,15 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
}
}
int tap_probe_vnet_hdr(int fd)
int tap_probe_vnet_hdr(int fd, Error **errp)
{
struct ifreq ifr;
if (ioctl(fd, TUNGETIFF, &ifr) != 0) {
error_report("TUNGETIFF ioctl() failed: %s", strerror(errno));
return 0;
/* TUNGETIFF is available since kernel v2.6.27 */
error_setg_errno(errp, errno,
"Unable to query TUNGETIFF on FD %d", fd);
return -1;
}
return ifr.ifr_flags & IFF_VNET_HDR;

View File

@ -207,7 +207,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
{
}
int tap_probe_vnet_hdr(int fd)
int tap_probe_vnet_hdr(int fd, Error **errp)
{
return 0;
}

View File

@ -37,7 +37,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
{
}
int tap_probe_vnet_hdr(int fd)
int tap_probe_vnet_hdr(int fd, Error **errp)
{
return 0;
}

View File

@ -598,7 +598,11 @@ int net_init_bridge(const Netdev *netdev, const char *name,
}
qemu_set_nonblock(fd);
vnet_hdr = tap_probe_vnet_hdr(fd);
vnet_hdr = tap_probe_vnet_hdr(fd, errp);
if (vnet_hdr < 0) {
close(fd);
return -1;
}
s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper,
@ -690,6 +694,8 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
}
if (vhostfdname) {
int ret;
vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err);
if (vhostfd == -1) {
if (tap->has_vhostforce && tap->vhostforce) {
@ -699,7 +705,12 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
}
return;
}
qemu_set_nonblock(vhostfd);
ret = qemu_try_set_nonblock(vhostfd);
if (ret < 0) {
error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
name, fd);
return;
}
} else {
vhostfd = open("/dev/vhost-net", O_RDWR);
if (vhostfd < 0) {
@ -767,6 +778,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
Error *err = NULL;
const char *vhostfdname;
char ifname[128];
int ret = 0;
assert(netdev->type == NET_CLIENT_DRIVER_TAP);
tap = &netdev->u.tap;
@ -795,9 +807,18 @@ int net_init_tap(const Netdev *netdev, const char *name,
return -1;
}
qemu_set_nonblock(fd);
ret = qemu_try_set_nonblock(fd);
if (ret < 0) {
error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
name, fd);
return -1;
}
vnet_hdr = tap_probe_vnet_hdr(fd);
vnet_hdr = tap_probe_vnet_hdr(fd, errp);
if (vnet_hdr < 0) {
close(fd);
return -1;
}
net_init_tap_one(tap, peer, "tap", name, NULL,
script, downscript,
@ -810,7 +831,6 @@ int net_init_tap(const Netdev *netdev, const char *name,
char **fds;
char **vhost_fds;
int nfds = 0, nvhosts = 0;
int ret = 0;
if (tap->has_ifname || tap->has_script || tap->has_downscript ||
tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
@ -842,11 +862,19 @@ int net_init_tap(const Netdev *netdev, const char *name,
goto free_fail;
}
qemu_set_nonblock(fd);
ret = qemu_try_set_nonblock(fd);
if (ret < 0) {
error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
name, fd);
goto free_fail;
}
if (i == 0) {
vnet_hdr = tap_probe_vnet_hdr(fd);
} else if (vnet_hdr != tap_probe_vnet_hdr(fd)) {
vnet_hdr = tap_probe_vnet_hdr(fd, errp);
if (vnet_hdr < 0) {
goto free_fail;
}
} else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) {
error_setg(errp,
"vnet_hdr not consistent across given tap fds");
ret = -1;
@ -891,7 +919,11 @@ free_fail:
}
qemu_set_nonblock(fd);
vnet_hdr = tap_probe_vnet_hdr(fd);
vnet_hdr = tap_probe_vnet_hdr(fd, errp);
if (vnet_hdr < 0) {
close(fd);
return -1;
}
net_init_tap_one(tap, peer, "bridge", name, ifname,
script, downscript, vhostfdname,

View File

@ -34,7 +34,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp);
int tap_probe_vnet_hdr(int fd);
int tap_probe_vnet_hdr(int fd, Error **errp);
int tap_probe_vnet_hdr_len(int fd, int len);
int tap_probe_has_ufo(int fd);
void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);

View File

@ -4695,24 +4695,25 @@ SRST
stored. The file format is libpcap, so it can be analyzed with
tools such as tcpdump or Wireshark.
``-object colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id][,compare_timeout=@var{ms}][,expired_scan_cycle=@var{ms}``
Colo-compare gets packet from primary\_inchardevid and
secondary\_inchardevid, than compare primary packet with
secondary packet. If the packets are same, we will output
primary packet to outdevchardevid, else we will notify
colo-frame do checkpoint and send primary packet to
outdevchardevid. In order to improve efficiency, we need to put
the task of comparison in another thread. If it has the
vnet\_hdr\_support flag, colo compare will send/recv packet with
vnet\_hdr\_len. Then compare\_timeout=@var{ms} determines the
maximum delay colo-compare wait for the packet.
The expired\_scan\_cycle=@var{ms} to set the period of scanning
expired primary node network packets.
If you want to use Xen COLO, will need the notify\_dev to
``-object colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id][,compare_timeout=@var{ms}][,expired_scan_cycle=@var{ms}][,max_queue_size=@var{size}]``
Colo-compare gets packet from primary\_in chardevid and
secondary\_in, then compare whether the payload of primary packet
and secondary packet are the same. If same, it will output
primary packet to out\_dev, else it will notify COLO-framework to do
checkpoint and send primary packet to out\_dev. In order to
improve efficiency, we need to put the task of comparison in
another iothread. If it has the vnet\_hdr\_support flag,
colo compare will send/recv packet with vnet\_hdr\_len.
The compare\_timeout=@var{ms} determines the maximum time of the
colo-compare hold the packet. The expired\_scan\_cycle=@var{ms}
is to set the period of scanning expired primary node network packets.
The max\_queue\_size=@var{size} is to set the max compare queue
size depend on user environment.
If user want to use Xen COLO, need to add the notify\_dev to
notify Xen colo-frame to do checkpoint.
we must use it with the help of filter-mirror and
filter-redirector.
COLO-compare must be used with the help of filter-mirror,
filter-redirector and filter-rewriter.
::

View File

@ -260,25 +260,35 @@ void qemu_set_block(int fd)
assert(f != -1);
}
void qemu_set_nonblock(int fd)
int qemu_try_set_nonblock(int fd)
{
int f;
f = fcntl(fd, F_GETFL);
assert(f != -1);
f = fcntl(fd, F_SETFL, f | O_NONBLOCK);
#ifdef __OpenBSD__
if (f == -1) {
return -errno;
}
if (fcntl(fd, F_SETFL, f | O_NONBLOCK) == -1) {
#ifdef __OpenBSD__
/*
* Previous to OpenBSD 6.3, fcntl(F_SETFL) is not permitted on
* memory devices and sets errno to ENODEV.
* It's OK if we fail to set O_NONBLOCK on devices like /dev/null,
* because they will never block anyway.
*/
assert(errno == ENODEV);
}
#else
assert(f != -1);
if (errno == ENODEV) {
return 0;
}
#endif
return -errno;
}
return 0;
}
void qemu_set_nonblock(int fd)
{
int f;
f = qemu_try_set_nonblock(fd);
assert(f == 0);
}
int socket_set_fast_reuse(int fd)

View File

@ -132,31 +132,6 @@ struct tm *localtime_r(const time_t *timep, struct tm *result)
}
#endif /* CONFIG_LOCALTIME_R */
void qemu_set_block(int fd)
{
unsigned long opt = 0;
WSAEventSelect(fd, NULL, 0);
ioctlsocket(fd, FIONBIO, &opt);
}
void qemu_set_nonblock(int fd)
{
unsigned long opt = 1;
ioctlsocket(fd, FIONBIO, &opt);
qemu_fd_register(fd);
}
int socket_set_fast_reuse(int fd)
{
/* Enabling the reuse of an endpoint that was used by a socket still in
* TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows
* fast reuse is the default and SO_REUSEADDR does strange things. So we
* don't have to do anything here. More info can be found at:
* http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */
return 0;
}
static int socket_error(void)
{
switch (WSAGetLastError()) {
@ -233,6 +208,38 @@ static int socket_error(void)
}
}
void qemu_set_block(int fd)
{
unsigned long opt = 0;
WSAEventSelect(fd, NULL, 0);
ioctlsocket(fd, FIONBIO, &opt);
}
int qemu_try_set_nonblock(int fd)
{
unsigned long opt = 1;
if (ioctlsocket(fd, FIONBIO, &opt) != NO_ERROR) {
return -socket_error();
}
qemu_fd_register(fd);
return 0;
}
void qemu_set_nonblock(int fd)
{
(void)qemu_try_set_nonblock(fd);
}
int socket_set_fast_reuse(int fd)
{
/* Enabling the reuse of an endpoint that was used by a socket still in
* TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows
* fast reuse is the default and SO_REUSEADDR does strange things. So we
* don't have to do anything here. More info can be found at:
* http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */
return 0;
}
int inet_aton(const char *cp, struct in_addr *ia)
{
uint32_t addr = inet_addr(cp);