diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c index 043ba61b86..5f4b26fc5f 100644 --- a/hw/net/ftgmac100.c +++ b/hw/net/ftgmac100.c @@ -810,16 +810,18 @@ static void ftgmac100_write(void *opaque, hwaddr addr, s->phydata = value & 0xffff; break; case FTGMAC100_DBLAC: /* DMA Burst Length and Arbitration Control */ - if (FTGMAC100_DBLAC_TXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) { + if (FTGMAC100_DBLAC_TXDES_SIZE(value) < sizeof(FTGMAC100Desc)) { qemu_log_mask(LOG_GUEST_ERROR, - "%s: transmit descriptor too small : %d bytes\n", - __func__, FTGMAC100_DBLAC_TXDES_SIZE(s->dblac)); + "%s: transmit descriptor too small: %" PRIx64 + " bytes\n", __func__, + FTGMAC100_DBLAC_TXDES_SIZE(value)); break; } - if (FTGMAC100_DBLAC_RXDES_SIZE(s->dblac) < sizeof(FTGMAC100Desc)) { + if (FTGMAC100_DBLAC_RXDES_SIZE(value) < sizeof(FTGMAC100Desc)) { qemu_log_mask(LOG_GUEST_ERROR, - "%s: receive descriptor too small : %d bytes\n", - __func__, FTGMAC100_DBLAC_RXDES_SIZE(s->dblac)); + "%s: receive descriptor too small : %" PRIx64 + " bytes\n", __func__, + FTGMAC100_DBLAC_RXDES_SIZE(value)); break; } s->dblac = value; diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c index 162f802dd7..331c73cfc0 100644 --- a/hw/net/net_tx_pkt.c +++ b/hw/net/net_tx_pkt.c @@ -468,8 +468,8 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt) /* num of iovec without vhdr */ uint32_t iov_len = pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1; uint16_t csl; - struct ip_header *iphdr; size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset; + uint16_t l3_proto = eth_get_l3_proto(iov, 1, iov->iov_len); /* Put zero to checksum field */ iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); @@ -477,9 +477,18 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt) /* Calculate L4 TCP/UDP checksum */ csl = pkt->payload_len; + csum_cntr = 0; + cso = 0; /* add pseudo header to csum */ - iphdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; - csum_cntr = eth_calc_ip4_pseudo_hdr_csum(iphdr, csl, &cso); + if (l3_proto == ETH_P_IP) { + csum_cntr = eth_calc_ip4_pseudo_hdr_csum( + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base, + csl, &cso); + } else if (l3_proto == ETH_P_IPV6) { + csum_cntr = eth_calc_ip6_pseudo_hdr_csum( + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base, + csl, pkt->l4proto, &cso); + } /* data checksum */ csum_cntr += diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 10cc958396..4895af1cbe 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -3416,6 +3416,7 @@ static void virtio_net_device_unrealize(DeviceState *dev) g_free(n->vlans); if (n->failover) { + device_listener_unregister(&n->primary_listener); g_free(n->primary_device_id); g_free(n->standby_id); qobject_unref(n->primary_device_dict); diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index 57cd049d6e..7d1f813576 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -18,6 +18,7 @@ int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen); int socket_set_cork(int fd, int v); int socket_set_nodelay(int fd); void qemu_set_block(int fd); +int qemu_try_set_nonblock(int fd); void qemu_set_nonblock(int fd); int socket_set_fast_reuse(int fd); diff --git a/net/colo-compare.c b/net/colo-compare.c index 398b7546ff..cc15f23dea 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -59,6 +59,7 @@ static bool colo_compare_active; static QemuMutex event_mtx; static QemuCond event_complete_cond; static int event_unhandled_count; +static uint32_t max_queue_size; /* * + CompareState ++ @@ -222,7 +223,7 @@ static void fill_pkt_tcp_info(void *data, uint32_t *max_ack) */ static int colo_insert_packet(GQueue *queue, Packet *pkt, uint32_t *max_ack) { - if (g_queue_get_length(queue) <= MAX_QUEUE_SIZE) { + if (g_queue_get_length(queue) <= max_queue_size) { if (pkt->ip->ip_p == IPPROTO_TCP) { fill_pkt_tcp_info(pkt, max_ack); g_queue_insert_sorted(queue, @@ -1134,6 +1135,37 @@ static void compare_set_expired_scan_cycle(Object *obj, Visitor *v, s->expired_scan_cycle = value; } +static void get_max_queue_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + uint32_t value = max_queue_size; + + visit_type_uint32(v, name, &value, errp); +} + +static void set_max_queue_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + Error *local_err = NULL; + uint32_t value; + + visit_type_uint32(v, name, &value, &local_err); + if (local_err) { + goto out; + } + if (!value) { + error_setg(&local_err, "Property '%s.%s' requires a positive value", + object_get_typename(obj), name); + goto out; + } + max_queue_size = value; + +out: + error_propagate(errp, local_err); +} + static void compare_pri_rs_finalize(SocketReadState *pri_rs) { CompareState *s = container_of(pri_rs, CompareState, pri_rs); @@ -1251,6 +1283,11 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) s->expired_scan_cycle = REGULAR_PACKET_CHECK_MS; } + if (!max_queue_size) { + /* Set default queue size to 1024 */ + max_queue_size = MAX_QUEUE_SIZE; + } + if (find_and_check_chardev(&chr, s->pri_indev, errp) || !qemu_chr_fe_init(&s->chr_pri_in, chr, errp)) { return; @@ -1370,6 +1407,10 @@ static void colo_compare_init(Object *obj) compare_get_expired_scan_cycle, compare_set_expired_scan_cycle, NULL, NULL); + object_property_add(obj, "max_queue_size", "uint32", + get_max_queue_size, + set_max_queue_size, NULL, NULL); + s->vnet_hdr = false; object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr, compare_set_vnet_hdr); diff --git a/net/socket.c b/net/socket.c index c92354049b..2d21fddd9c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -725,13 +725,18 @@ int net_init_socket(const Netdev *netdev, const char *name, } if (sock->has_fd) { - int fd; + int fd, ret; fd = monitor_fd_param(cur_mon, sock->fd, errp); if (fd == -1) { return -1; } - qemu_set_nonblock(fd); + ret = qemu_try_set_nonblock(fd); + if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", + name, fd); + return -1; + } if (!net_socket_fd_init(peer, "socket", name, fd, 1, sock->mcast, errp)) { return -1; diff --git a/net/tap-bsd.c b/net/tap-bsd.c index a5c3707f80..77aaf674b1 100644 --- a/net/tap-bsd.c +++ b/net/tap-bsd.c @@ -211,7 +211,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) { } -int tap_probe_vnet_hdr(int fd) +int tap_probe_vnet_hdr(int fd, Error **errp) { return 0; } diff --git a/net/tap-linux.c b/net/tap-linux.c index e0dd442ee3..b0635e9e32 100644 --- a/net/tap-linux.c +++ b/net/tap-linux.c @@ -147,13 +147,15 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) } } -int tap_probe_vnet_hdr(int fd) +int tap_probe_vnet_hdr(int fd, Error **errp) { struct ifreq ifr; if (ioctl(fd, TUNGETIFF, &ifr) != 0) { - error_report("TUNGETIFF ioctl() failed: %s", strerror(errno)); - return 0; + /* TUNGETIFF is available since kernel v2.6.27 */ + error_setg_errno(errp, errno, + "Unable to query TUNGETIFF on FD %d", fd); + return -1; } return ifr.ifr_flags & IFF_VNET_HDR; diff --git a/net/tap-solaris.c b/net/tap-solaris.c index d03165c57c..0475a58207 100644 --- a/net/tap-solaris.c +++ b/net/tap-solaris.c @@ -207,7 +207,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) { } -int tap_probe_vnet_hdr(int fd) +int tap_probe_vnet_hdr(int fd, Error **errp) { return 0; } diff --git a/net/tap-stub.c b/net/tap-stub.c index a9ab8f8293..de525a2e69 100644 --- a/net/tap-stub.c +++ b/net/tap-stub.c @@ -37,7 +37,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) { } -int tap_probe_vnet_hdr(int fd) +int tap_probe_vnet_hdr(int fd, Error **errp) { return 0; } diff --git a/net/tap.c b/net/tap.c index f9dcc2ef51..14dc904fca 100644 --- a/net/tap.c +++ b/net/tap.c @@ -598,7 +598,11 @@ int net_init_bridge(const Netdev *netdev, const char *name, } qemu_set_nonblock(fd); - vnet_hdr = tap_probe_vnet_hdr(fd); + vnet_hdr = tap_probe_vnet_hdr(fd, errp); + if (vnet_hdr < 0) { + close(fd); + return -1; + } s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr); snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper, @@ -690,6 +694,8 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, } if (vhostfdname) { + int ret; + vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err); if (vhostfd == -1) { if (tap->has_vhostforce && tap->vhostforce) { @@ -699,7 +705,12 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, } return; } - qemu_set_nonblock(vhostfd); + ret = qemu_try_set_nonblock(vhostfd); + if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", + name, fd); + return; + } } else { vhostfd = open("/dev/vhost-net", O_RDWR); if (vhostfd < 0) { @@ -767,6 +778,7 @@ int net_init_tap(const Netdev *netdev, const char *name, Error *err = NULL; const char *vhostfdname; char ifname[128]; + int ret = 0; assert(netdev->type == NET_CLIENT_DRIVER_TAP); tap = &netdev->u.tap; @@ -795,9 +807,18 @@ int net_init_tap(const Netdev *netdev, const char *name, return -1; } - qemu_set_nonblock(fd); + ret = qemu_try_set_nonblock(fd); + if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", + name, fd); + return -1; + } - vnet_hdr = tap_probe_vnet_hdr(fd); + vnet_hdr = tap_probe_vnet_hdr(fd, errp); + if (vnet_hdr < 0) { + close(fd); + return -1; + } net_init_tap_one(tap, peer, "tap", name, NULL, script, downscript, @@ -810,7 +831,6 @@ int net_init_tap(const Netdev *netdev, const char *name, char **fds; char **vhost_fds; int nfds = 0, nvhosts = 0; - int ret = 0; if (tap->has_ifname || tap->has_script || tap->has_downscript || tap->has_vnet_hdr || tap->has_helper || tap->has_queues || @@ -842,11 +862,19 @@ int net_init_tap(const Netdev *netdev, const char *name, goto free_fail; } - qemu_set_nonblock(fd); + ret = qemu_try_set_nonblock(fd); + if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", + name, fd); + goto free_fail; + } if (i == 0) { - vnet_hdr = tap_probe_vnet_hdr(fd); - } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) { + vnet_hdr = tap_probe_vnet_hdr(fd, errp); + if (vnet_hdr < 0) { + goto free_fail; + } + } else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) { error_setg(errp, "vnet_hdr not consistent across given tap fds"); ret = -1; @@ -891,7 +919,11 @@ free_fail: } qemu_set_nonblock(fd); - vnet_hdr = tap_probe_vnet_hdr(fd); + vnet_hdr = tap_probe_vnet_hdr(fd, errp); + if (vnet_hdr < 0) { + close(fd); + return -1; + } net_init_tap_one(tap, peer, "bridge", name, ifname, script, downscript, vhostfdname, diff --git a/net/tap_int.h b/net/tap_int.h index e3194b23f4..225a49ea48 100644 --- a/net/tap_int.h +++ b/net/tap_int.h @@ -34,7 +34,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen); void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp); -int tap_probe_vnet_hdr(int fd); +int tap_probe_vnet_hdr(int fd, Error **errp); int tap_probe_vnet_hdr_len(int fd, int len); int tap_probe_has_ufo(int fd); void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo); diff --git a/qemu-options.hx b/qemu-options.hx index d2c1e95bcf..65147ad971 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4695,24 +4695,25 @@ SRST stored. The file format is libpcap, so it can be analyzed with tools such as tcpdump or Wireshark. - ``-object colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id][,compare_timeout=@var{ms}][,expired_scan_cycle=@var{ms}`` - Colo-compare gets packet from primary\_inchardevid and - secondary\_inchardevid, than compare primary packet with - secondary packet. If the packets are same, we will output - primary packet to outdevchardevid, else we will notify - colo-frame do checkpoint and send primary packet to - outdevchardevid. In order to improve efficiency, we need to put - the task of comparison in another thread. If it has the - vnet\_hdr\_support flag, colo compare will send/recv packet with - vnet\_hdr\_len. Then compare\_timeout=@var{ms} determines the - maximum delay colo-compare wait for the packet. - The expired\_scan\_cycle=@var{ms} to set the period of scanning - expired primary node network packets. - If you want to use Xen COLO, will need the notify\_dev to + ``-object colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id][,compare_timeout=@var{ms}][,expired_scan_cycle=@var{ms}][,max_queue_size=@var{size}]`` + Colo-compare gets packet from primary\_in chardevid and + secondary\_in, then compare whether the payload of primary packet + and secondary packet are the same. If same, it will output + primary packet to out\_dev, else it will notify COLO-framework to do + checkpoint and send primary packet to out\_dev. In order to + improve efficiency, we need to put the task of comparison in + another iothread. If it has the vnet\_hdr\_support flag, + colo compare will send/recv packet with vnet\_hdr\_len. + The compare\_timeout=@var{ms} determines the maximum time of the + colo-compare hold the packet. The expired\_scan\_cycle=@var{ms} + is to set the period of scanning expired primary node network packets. + The max\_queue\_size=@var{size} is to set the max compare queue + size depend on user environment. + If user want to use Xen COLO, need to add the notify\_dev to notify Xen colo-frame to do checkpoint. - we must use it with the help of filter-mirror and - filter-redirector. + COLO-compare must be used with the help of filter-mirror, + filter-redirector and filter-rewriter. :: diff --git a/util/oslib-posix.c b/util/oslib-posix.c index e60aea85b6..36bf8593f8 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -260,25 +260,35 @@ void qemu_set_block(int fd) assert(f != -1); } -void qemu_set_nonblock(int fd) +int qemu_try_set_nonblock(int fd) { int f; f = fcntl(fd, F_GETFL); - assert(f != -1); - f = fcntl(fd, F_SETFL, f | O_NONBLOCK); -#ifdef __OpenBSD__ if (f == -1) { + return -errno; + } + if (fcntl(fd, F_SETFL, f | O_NONBLOCK) == -1) { +#ifdef __OpenBSD__ /* * Previous to OpenBSD 6.3, fcntl(F_SETFL) is not permitted on * memory devices and sets errno to ENODEV. * It's OK if we fail to set O_NONBLOCK on devices like /dev/null, * because they will never block anyway. */ - assert(errno == ENODEV); - } -#else - assert(f != -1); + if (errno == ENODEV) { + return 0; + } #endif + return -errno; + } + return 0; +} + +void qemu_set_nonblock(int fd) +{ + int f; + f = qemu_try_set_nonblock(fd); + assert(f == 0); } int socket_set_fast_reuse(int fd) diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 3b49d27297..7eedbe5859 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -132,31 +132,6 @@ struct tm *localtime_r(const time_t *timep, struct tm *result) } #endif /* CONFIG_LOCALTIME_R */ -void qemu_set_block(int fd) -{ - unsigned long opt = 0; - WSAEventSelect(fd, NULL, 0); - ioctlsocket(fd, FIONBIO, &opt); -} - -void qemu_set_nonblock(int fd) -{ - unsigned long opt = 1; - ioctlsocket(fd, FIONBIO, &opt); - qemu_fd_register(fd); -} - -int socket_set_fast_reuse(int fd) -{ - /* Enabling the reuse of an endpoint that was used by a socket still in - * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows - * fast reuse is the default and SO_REUSEADDR does strange things. So we - * don't have to do anything here. More info can be found at: - * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ - return 0; -} - - static int socket_error(void) { switch (WSAGetLastError()) { @@ -233,6 +208,38 @@ static int socket_error(void) } } +void qemu_set_block(int fd) +{ + unsigned long opt = 0; + WSAEventSelect(fd, NULL, 0); + ioctlsocket(fd, FIONBIO, &opt); +} + +int qemu_try_set_nonblock(int fd) +{ + unsigned long opt = 1; + if (ioctlsocket(fd, FIONBIO, &opt) != NO_ERROR) { + return -socket_error(); + } + qemu_fd_register(fd); + return 0; +} + +void qemu_set_nonblock(int fd) +{ + (void)qemu_try_set_nonblock(fd); +} + +int socket_set_fast_reuse(int fd) +{ + /* Enabling the reuse of an endpoint that was used by a socket still in + * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows + * fast reuse is the default and SO_REUSEADDR does strange things. So we + * don't have to do anything here. More info can be found at: + * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ + return 0; +} + int inet_aton(const char *cp, struct in_addr *ia) { uint32_t addr = inet_addr(cp);