From df8d07081718c29d04d106583d9c300128686cda Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 20 Oct 2022 11:58:45 +0200 Subject: [PATCH 01/26] virtio-net: fix bottom-half packet TX on asynchronous completion When virtio-net is used with the socket netdev backend, the backend can be busy and not able to collect new packets. In this case, net_socket_receive() returns 0 and registers a poll function to detect when the socket is ready again. In virtio_net_tx_bh(), virtio_net_flush_tx() forwards the 0, the virtio notifications are disabled and the function is not re-scheduled, waiting for the backend to be ready. When the socket netdev backend is again able to send packets, the poll function re-starts to flush remaining packets. This is done by calling virtio_net_tx_complete(). It re-enables notifications and calls again virtio_net_flush_tx(). But it seems if virtio_net_flush_tx() reaches the tx_burst value all the queue is not flushed and no new notification is sent to re-schedule virtio_net_tx_bh(). Nothing re-start to flush the queue and remaining packets are stuck in the queue. To fix that, detect in virtio_net_tx_complete() if virtio_net_flush_tx() has been stopped by tx_burst and if yes re-schedule the bottom half function virtio_net_tx_bh() to flush the remaining packets. This is what is done in virtio_net_tx_bh() when the virtio_net_flush_tx() is synchronous, and completly by-passed when the operation needs to be asynchronous. Fixes: a697a334b3c4 ("virtio-net: Introduce a new bottom half packet TX") Cc: alex.williamson@redhat.com Signed-off-by: Laurent Vivier Reviewed-by: Michael S. Tsirkin Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/net/virtio-net.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index e9f696b4cf..1fbf2f3e19 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -2526,6 +2526,7 @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) VirtIONet *n = qemu_get_nic_opaque(nc); VirtIONetQueue *q = virtio_net_get_subqueue(nc); VirtIODevice *vdev = VIRTIO_DEVICE(n); + int ret; virtqueue_push(q->tx_vq, q->async_tx.elem, 0); virtio_notify(vdev, q->tx_vq); @@ -2534,7 +2535,17 @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) q->async_tx.elem = NULL; virtio_queue_set_notification(q->tx_vq, 1); - virtio_net_flush_tx(q); + ret = virtio_net_flush_tx(q); + if (q->tx_bh && ret >= n->tx_burst) { + /* + * the flush has been stopped by tx_burst + * we will not receive notification for the + * remainining part, so re-schedule + */ + virtio_queue_set_notification(q->tx_vq, 0); + qemu_bh_schedule(q->tx_bh); + q->tx_waiting = 1; + } } /* TX */ From 7550a82259fcf9ce5f1f6443ced779d0eb8afdca Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 20 Oct 2022 11:58:46 +0200 Subject: [PATCH 02/26] virtio-net: fix TX timer with tx_burst When virtio_net_flush_tx() reaches the tx_burst value all the queue is not flushed and nothing restart the timer. Fix that by doing for TX timer as we do for bottom half TX: rearming the timer if we find any packet to send during the virtio_net_flush_tx() call. Fixes: e3f30488e5f8 ("virtio-net: Limit number of packets sent per TX flush") Cc: alex.williamson@redhat.com Signed-off-by: Laurent Vivier Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/net/virtio-net.c | 50 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 1fbf2f3e19..b6903aea54 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -2536,14 +2536,19 @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) virtio_queue_set_notification(q->tx_vq, 1); ret = virtio_net_flush_tx(q); - if (q->tx_bh && ret >= n->tx_burst) { + if (ret >= n->tx_burst) { /* * the flush has been stopped by tx_burst * we will not receive notification for the * remainining part, so re-schedule */ virtio_queue_set_notification(q->tx_vq, 0); - qemu_bh_schedule(q->tx_bh); + if (q->tx_bh) { + qemu_bh_schedule(q->tx_bh); + } else { + timer_mod(q->tx_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); + } q->tx_waiting = 1; } } @@ -2644,6 +2649,8 @@ drop: return num_packets; } +static void virtio_net_tx_timer(void *opaque); + static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) { VirtIONet *n = VIRTIO_NET(vdev); @@ -2661,15 +2668,13 @@ static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq) } if (q->tx_waiting) { - virtio_queue_set_notification(vq, 1); + /* We already have queued packets, immediately flush */ timer_del(q->tx_timer); - q->tx_waiting = 0; - if (virtio_net_flush_tx(q) == -EINVAL) { - return; - } + virtio_net_tx_timer(q); } else { + /* re-arm timer to flush it (and more) on next tick */ timer_mod(q->tx_timer, - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); q->tx_waiting = 1; virtio_queue_set_notification(vq, 0); } @@ -2702,6 +2707,8 @@ static void virtio_net_tx_timer(void *opaque) VirtIONetQueue *q = opaque; VirtIONet *n = q->n; VirtIODevice *vdev = VIRTIO_DEVICE(n); + int ret; + /* This happens when device was stopped but BH wasn't. */ if (!vdev->vm_running) { /* Make sure tx waiting is set, so we'll run when restarted. */ @@ -2716,8 +2723,33 @@ static void virtio_net_tx_timer(void *opaque) return; } + ret = virtio_net_flush_tx(q); + if (ret == -EBUSY || ret == -EINVAL) { + return; + } + /* + * If we flush a full burst of packets, assume there are + * more coming and immediately rearm + */ + if (ret >= n->tx_burst) { + q->tx_waiting = 1; + timer_mod(q->tx_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); + return; + } + /* + * If less than a full burst, re-enable notification and flush + * anything that may have come in while we weren't looking. If + * we find something, assume the guest is still active and rearm + */ virtio_queue_set_notification(q->tx_vq, 1); - virtio_net_flush_tx(q); + ret = virtio_net_flush_tx(q); + if (ret > 0) { + virtio_queue_set_notification(q->tx_vq, 0); + q->tx_waiting = 1; + timer_mod(q->tx_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); + } } static void virtio_net_tx_bh(void *opaque) From faa825ddb3f116c2aa4db0ab5ed72bd093f39337 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 20 Oct 2022 10:00:58 +0200 Subject: [PATCH 03/26] vdpa: Delete duplicated vdpa_feature_bits entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This entry was duplicated on referenced commit. Removing it. Fixes: 402378407dbd ("vhost-vdpa: multiqueue support") Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Signed-off-by: Jason Wang --- net/vhost-vdpa.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 4bc3fd01a8..eebf29f5c1 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -63,7 +63,6 @@ const int vdpa_feature_bits[] = { VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_RX_EXTRA, VIRTIO_NET_F_CTRL_VLAN, - VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_CTRL_MAC_ADDR, VIRTIO_NET_F_RSS, VIRTIO_NET_F_MQ, From 6ce262fbe7744a3d82d872e232fd0d39cfba0363 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 20 Oct 2022 10:02:30 +0200 Subject: [PATCH 04/26] vdpa: Remove shadow CVQ command check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The guest will see undefined behavior if it issue not negotiate commands, bit it is expected somehow. Simplify code deleting this check. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Signed-off-by: Jason Wang --- net/vhost-vdpa.c | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index eebf29f5c1..6d64000202 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -461,48 +461,6 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { .check_peer_type = vhost_vdpa_check_peer_type, }; -/** - * Do not forward commands not supported by SVQ. Otherwise, the device could - * accept it and qemu would not know how to update the device model. - */ -static bool vhost_vdpa_net_cvq_validate_cmd(const void *out_buf, size_t len) -{ - struct virtio_net_ctrl_hdr ctrl; - - if (unlikely(len < sizeof(ctrl))) { - qemu_log_mask(LOG_GUEST_ERROR, - "%s: invalid legnth of out buffer %zu\n", __func__, len); - return false; - } - - memcpy(&ctrl, out_buf, sizeof(ctrl)); - switch (ctrl.class) { - case VIRTIO_NET_CTRL_MAC: - switch (ctrl.cmd) { - case VIRTIO_NET_CTRL_MAC_ADDR_SET: - return true; - default: - qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n", - __func__, ctrl.cmd); - }; - break; - case VIRTIO_NET_CTRL_MQ: - switch (ctrl.cmd) { - case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: - return true; - default: - qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mq cmd %u\n", - __func__, ctrl.cmd); - }; - break; - default: - qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n", - __func__, ctrl.class); - }; - - return false; -} - /** * Validate and copy control virtqueue commands. * @@ -526,16 +484,10 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, .iov_len = sizeof(status), }; ssize_t dev_written = -EINVAL; - bool ok; out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_len()); - ok = vhost_vdpa_net_cvq_validate_cmd(s->cvq_cmd_out_buffer, out.iov_len); - if (unlikely(!ok)) { - goto out; - } - dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); if (unlikely(dev_written < 0)) { goto out; From 8801ccd0500437a86e3d15a806f37ebb84605dce Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Sat, 8 Oct 2022 00:58:58 -0700 Subject: [PATCH 05/26] vhost-vdpa: allow passing opened vhostfd to vhost-vdpa MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to other vhost backends, vhostfd can be passed to vhost-vdpa backend as another parameter to instantiate vhost-vdpa net client. This would benefit the use case where only open file descriptors, as opposed to raw vhost-vdpa device paths, are accessible from the QEMU process. (qemu) netdev_add type=vhost-vdpa,vhostfd=61,id=vhost-vdpa1 Signed-off-by: Si-Wei Liu Acked-by: Eugenio Pérez Signed-off-by: Jason Wang --- net/vhost-vdpa.c | 25 ++++++++++++++++++++----- qapi/net.json | 3 +++ qemu-options.hx | 6 ++++-- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 6d64000202..9c1b25d782 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -634,14 +634,29 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); opts = &netdev->u.vhost_vdpa; - if (!opts->vhostdev) { - error_setg(errp, "vdpa character device not specified with vhostdev"); + if (!opts->has_vhostdev && !opts->has_vhostfd) { + error_setg(errp, + "vhost-vdpa: neither vhostdev= nor vhostfd= was specified"); return -1; } - vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp); - if (vdpa_device_fd == -1) { - return -errno; + if (opts->has_vhostdev && opts->has_vhostfd) { + error_setg(errp, + "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive"); + return -1; + } + + if (opts->has_vhostdev) { + vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp); + if (vdpa_device_fd == -1) { + return -errno; + } + } else if (opts->has_vhostfd) { + vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp); + if (vdpa_device_fd == -1) { + error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: "); + return -1; + } } r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp); diff --git a/qapi/net.json b/qapi/net.json index dd088c09c5..926ecc8cca 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -442,6 +442,8 @@ # @vhostdev: path of vhost-vdpa device # (default:'/dev/vhost-vdpa-0') # +# @vhostfd: file descriptor of an already opened vhost vdpa device +# # @queues: number of queues to be created for multiqueue vhost-vdpa # (default: 1) # @@ -456,6 +458,7 @@ { 'struct': 'NetdevVhostVDPAOptions', 'data': { '*vhostdev': 'str', + '*vhostfd': 'str', '*queues': 'int', '*x-svq': {'type': 'bool', 'features' : [ 'unstable'] } } } diff --git a/qemu-options.hx b/qemu-options.hx index eb38e5dc40..876a70aa63 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2790,8 +2790,10 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, " configure a vhost-user network, backed by a chardev 'dev'\n" #endif #ifdef __linux__ - "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n" + "-netdev vhost-vdpa,id=str[,vhostdev=/path/to/dev][,vhostfd=h]\n" " configure a vhost-vdpa network,Establish a vhost-vdpa netdev\n" + " use 'vhostdev=/path/to/dev' to open a vhost vdpa device\n" + " use 'vhostfd=h' to connect to an already opened vhost vdpa device\n" #endif #ifdef CONFIG_VMNET "-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n" @@ -3296,7 +3298,7 @@ SRST -netdev type=vhost-user,id=net0,chardev=chr0 \ -device virtio-net-pci,netdev=net0 -``-netdev vhost-vdpa,vhostdev=/path/to/dev`` +``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]`` Establish a vhost-vdpa netdev. vDPA device is a device that uses a datapath which complies with From 7d0e12af59286f3784c3ab7502325fc6a051d117 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Mon, 3 Oct 2022 11:06:12 +0100 Subject: [PATCH 06/26] net: improve error message for missing netdev backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current message when using '-net user...' with SLIRP disabled at compile time is: qemu-system-x86_64: -net user: Parameter 'type' expects a net backend type (maybe it is not compiled into this binary) An observation is that we're using the 'netdev->type' field here which is an enum value, produced after QAPI has converted from its string form. IOW, at this point in the code, we know that the user's specified type name was a valid network backend. The only possible scenario that can make the backend init function be NULL, is if support for that backend was disabled at build time. Given this, we don't need to caveat our error message with a 'maybe' hint, we can be totally explicit. The use of QERR_INVALID_PARAMETER_VALUE doesn't really lend itself to user friendly error message text. Since this is not used to set a specific QAPI error class, we can simply stop using this pre-formatted error text and provide something better. Thus the new message is: qemu-system-x86_64: -net user: network backend 'user' is not compiled into this binary The case of passing 'hubport' for -net is also given a message reminding people they should have used -netdev/-nic instead, as this backend type is only valid for the modern syntax. Reviewed-by: Marc-André Lureau Reviewed-by: Thomas Huth Signed-off-by: Daniel P. Berrangé Signed-off-by: Jason Wang --- net/net.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/net/net.c b/net/net.c index 2db160e063..8ddafacf13 100644 --- a/net/net.c +++ b/net/net.c @@ -1036,19 +1036,23 @@ static int net_client_init1(const Netdev *netdev, bool is_netdev, Error **errp) if (is_netdev) { if (netdev->type == NET_CLIENT_DRIVER_NIC || !net_client_init_fun[netdev->type]) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", - "a netdev backend type"); + error_setg(errp, "network backend '%s' is not compiled into this binary", + NetClientDriver_str(netdev->type)); return -1; } } else { if (netdev->type == NET_CLIENT_DRIVER_NONE) { return 0; /* nothing to do */ } - if (netdev->type == NET_CLIENT_DRIVER_HUBPORT || - !net_client_init_fun[netdev->type]) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", - "a net backend type (maybe it is not compiled " - "into this binary)"); + if (netdev->type == NET_CLIENT_DRIVER_HUBPORT) { + error_setg(errp, "network backend '%s' is only supported with -netdev/-nic", + NetClientDriver_str(netdev->type)); + return -1; + } + + if (!net_client_init_fun[netdev->type]) { + error_setg(errp, "network backend '%s' is not compiled into this binary", + NetClientDriver_str(netdev->type)); return -1; } From f0c48e05bd7c535aa0614d284d165d998936c79b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 20 Oct 2022 17:52:48 +0200 Subject: [PATCH 07/26] vhost: allocate event_idx fields on vring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There was not enough room to accomodate them. Reviewed-by: Michael S. Tsirkin Signed-off-by: Eugenio Pérez Signed-off-by: Jason Wang --- hw/virtio/vhost-shadow-virtqueue.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 596d4434d2..a518f84772 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -570,16 +570,16 @@ void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq) { size_t desc_size = sizeof(vring_desc_t) * svq->vring.num; - size_t avail_size = offsetof(vring_avail_t, ring) + - sizeof(uint16_t) * svq->vring.num; + size_t avail_size = offsetof(vring_avail_t, ring[svq->vring.num]) + + sizeof(uint16_t); return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size()); } size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq) { - size_t used_size = offsetof(vring_used_t, ring) + - sizeof(vring_used_elem_t) * svq->vring.num; + size_t used_size = offsetof(vring_used_t, ring[svq->vring.num]) + + sizeof(uint16_t); return ROUND_UP(used_size, qemu_real_host_page_size()); } From 01f8beacea2a31edb6c270653052bea2778ae65d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 20 Oct 2022 17:52:49 +0200 Subject: [PATCH 08/26] vhost: toggle device callbacks using used event idx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Actually use the new field of the used ring and tell the device if SVQ wants to be notified. The code is not reachable at the moment. Reviewed-by: Michael S. Tsirkin Signed-off-by: Eugenio Pérez Signed-off-by: Jason Wang --- hw/virtio/vhost-shadow-virtqueue.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index a518f84772..f5c0fad3fc 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -369,15 +369,27 @@ static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) */ static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq) { - svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); - /* Make sure the flag is written before the read of used_idx */ + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) { + uint16_t *used_event = (uint16_t *)&svq->vring.avail->ring[svq->vring.num]; + *used_event = svq->shadow_used_idx; + } else { + svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); + } + + /* Make sure the event is enabled before the read of used_idx */ smp_mb(); return !vhost_svq_more_used(svq); } static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) { - svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); + /* + * No need to disable notification in the event idx case, since used event + * index is already an index too far away. + */ + if (!virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) { + svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); + } } static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, From 22a6840ff282727df3e3c745d92d764c7081f49b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 20 Oct 2022 17:52:50 +0200 Subject: [PATCH 09/26] vhost: use avail event idx on vhost_svq_kick MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So SVQ code knows if an event is needed. The code is not reachable at the moment. Reviewed-by: Michael S. Tsirkin Signed-off-by: Eugenio Pérez Signed-off-by: Jason Wang --- hw/virtio/vhost-shadow-virtqueue.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index f5c0fad3fc..f306ebef72 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -218,12 +218,22 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, static void vhost_svq_kick(VhostShadowVirtqueue *svq) { + bool needs_kick; + /* * We need to expose the available array entries before checking the used * flags */ smp_mb(); - if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { + + if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) { + uint16_t avail_event = *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]); + needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1); + } else { + needs_kick = !(svq->vring.used->flags & VRING_USED_F_NO_NOTIFY); + } + + if (!needs_kick) { return; } From 396d512669dccea2bb982685870356ab71e956ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 20 Oct 2022 17:52:51 +0200 Subject: [PATCH 10/26] vhost: Accept event idx flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enabling all the code path created before. Reviewed-by: Michael S. Tsirkin Signed-off-by: Eugenio Pérez Signed-off-by: Jason Wang --- hw/virtio/vhost-shadow-virtqueue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index f306ebef72..5bd14cad96 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -33,6 +33,7 @@ bool vhost_svq_valid_features(uint64_t features, Error **errp) ++b) { switch (b) { case VIRTIO_F_ANY_LAYOUT: + case VIRTIO_RING_F_EVENT_IDX: continue; case VIRTIO_F_ACCESS_PLATFORM: From 30e4226b78f888a75dade5a2cf1488f94da3a8db Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Oct 2022 11:09:06 +0200 Subject: [PATCH 11/26] net: introduce convert_host_port() Signed-off-by: Laurent Vivier Reviewed-by: Stefano Brivio Reviewed-by: David Gibson Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- include/qemu/sockets.h | 2 ++ net/net.c | 74 ++++++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 36 deletions(-) diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index 036745e586..db4bedb6fa 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -65,6 +65,8 @@ void socket_listen_cleanup(int fd, Error **errp); int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp); /* Old, ipv4 only bits. Don't use for new code. */ +int convert_host_port(struct sockaddr_in *saddr, const char *host, + const char *port, Error **errp); int parse_host_port(struct sockaddr_in *saddr, const char *str, Error **errp); int socket_init(void); diff --git a/net/net.c b/net/net.c index 8ddafacf13..407bdd1f07 100644 --- a/net/net.c +++ b/net/net.c @@ -66,15 +66,47 @@ static QTAILQ_HEAD(, NetClientState) net_clients; /***********************************************************/ /* network device redirectors */ +int convert_host_port(struct sockaddr_in *saddr, const char *host, + const char *port, Error **errp) +{ + struct hostent *he; + const char *r; + long p; + + memset(saddr, 0, sizeof(*saddr)); + + saddr->sin_family = AF_INET; + if (host[0] == '\0') { + saddr->sin_addr.s_addr = 0; + } else { + if (qemu_isdigit(host[0])) { + if (!inet_aton(host, &saddr->sin_addr)) { + error_setg(errp, "host address '%s' is not a valid " + "IPv4 address", host); + return -1; + } + } else { + he = gethostbyname(host); + if (he == NULL) { + error_setg(errp, "can't resolve host address '%s'", host); + return -1; + } + saddr->sin_addr = *(struct in_addr *)he->h_addr; + } + } + if (qemu_strtol(port, &r, 0, &p) != 0) { + error_setg(errp, "port number '%s' is invalid", port); + return -1; + } + saddr->sin_port = htons(p); + return 0; +} + int parse_host_port(struct sockaddr_in *saddr, const char *str, Error **errp) { gchar **substrings; - struct hostent *he; - const char *addr, *p, *r; - int port, ret = 0; - - memset(saddr, 0, sizeof(*saddr)); + int ret; substrings = g_strsplit(str, ":", 2); if (!substrings || !substrings[0] || !substrings[1]) { @@ -84,37 +116,7 @@ int parse_host_port(struct sockaddr_in *saddr, const char *str, goto out; } - addr = substrings[0]; - p = substrings[1]; - - saddr->sin_family = AF_INET; - if (addr[0] == '\0') { - saddr->sin_addr.s_addr = 0; - } else { - if (qemu_isdigit(addr[0])) { - if (!inet_aton(addr, &saddr->sin_addr)) { - error_setg(errp, "host address '%s' is not a valid " - "IPv4 address", addr); - ret = -1; - goto out; - } - } else { - he = gethostbyname(addr); - if (he == NULL) { - error_setg(errp, "can't resolve host address '%s'", addr); - ret = -1; - goto out; - } - saddr->sin_addr = *(struct in_addr *)he->h_addr; - } - } - port = strtol(p, (char **)&r, 0); - if (r == p) { - error_setg(errp, "port number '%s' is invalid", p); - ret = -1; - goto out; - } - saddr->sin_port = htons(port); + ret = convert_host_port(saddr, substrings[0], substrings[1], errp); out: g_strfreev(substrings); From d63ef17bfcba1126df6bfcb7bca64c96ac4a8d99 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Oct 2022 11:09:07 +0200 Subject: [PATCH 12/26] net: remove the @errp argument of net_client_inits() The only caller passes &error_fatal, so use this directly in the function. It's what we do for -blockdev, -device, and -object. Suggested-by: Markus Armbruster Signed-off-by: Laurent Vivier Reviewed-by: Markus Armbruster Reviewed-by: David Gibson Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- include/net/net.h | 2 +- net/net.c | 20 +++++++------------- softmmu/vl.c | 2 +- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/include/net/net.h b/include/net/net.h index 81d0b21def..c1c34a58f8 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -222,7 +222,7 @@ extern const char *host_net_devices[]; /* from net.c */ int net_client_parse(QemuOptsList *opts_list, const char *str); void show_netdevs(void); -int net_init_clients(Error **errp); +void net_init_clients(void); void net_check_clients(void); void net_cleanup(void); void hmp_host_net_add(Monitor *mon, const QDict *qdict); diff --git a/net/net.c b/net/net.c index 407bdd1f07..5e788802b1 100644 --- a/net/net.c +++ b/net/net.c @@ -1566,27 +1566,21 @@ out: return ret; } -int net_init_clients(Error **errp) +void net_init_clients(void) { net_change_state_entry = qemu_add_vm_change_state_handler(net_vm_change_state_handler, NULL); QTAILQ_INIT(&net_clients); - if (qemu_opts_foreach(qemu_find_opts("netdev"), - net_init_netdev, NULL, errp)) { - return -1; - } + qemu_opts_foreach(qemu_find_opts("netdev"), net_init_netdev, NULL, + &error_fatal); - if (qemu_opts_foreach(qemu_find_opts("nic"), net_param_nic, NULL, errp)) { - return -1; - } + qemu_opts_foreach(qemu_find_opts("nic"), net_param_nic, NULL, + &error_fatal); - if (qemu_opts_foreach(qemu_find_opts("net"), net_init_client, NULL, errp)) { - return -1; - } - - return 0; + qemu_opts_foreach(qemu_find_opts("net"), net_init_client, NULL, + &error_fatal); } int net_client_parse(QemuOptsList *opts_list, const char *optarg) diff --git a/softmmu/vl.c b/softmmu/vl.c index b464da25bc..a4ae131e4d 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -1904,7 +1904,7 @@ static void qemu_create_late_backends(void) qtest_server_init(qtest_chrdev, qtest_log, &error_fatal); } - net_init_clients(&error_fatal); + net_init_clients(); object_option_foreach_add(object_create_late); From 21fccb2cbbacdc045f19605915e847de31ca9862 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Oct 2022 11:09:08 +0200 Subject: [PATCH 13/26] net: simplify net_client_parse() error management All net_client_parse() callers exit in case of error. Move exit(1) to net_client_parse() and remove error checking from the callers. Suggested-by: Markus Armbruster Signed-off-by: Laurent Vivier Reviewed-by: Markus Armbruster Reviewed-by: David Gibson Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- include/net/net.h | 2 +- net/net.c | 6 ++---- softmmu/vl.c | 12 +++--------- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/include/net/net.h b/include/net/net.h index c1c34a58f8..55023e7e9f 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -220,7 +220,7 @@ extern NICInfo nd_table[MAX_NICS]; extern const char *host_net_devices[]; /* from net.c */ -int net_client_parse(QemuOptsList *opts_list, const char *str); +void net_client_parse(QemuOptsList *opts_list, const char *str); void show_netdevs(void); void net_init_clients(void); void net_check_clients(void); diff --git a/net/net.c b/net/net.c index 5e788802b1..178257abfd 100644 --- a/net/net.c +++ b/net/net.c @@ -1583,13 +1583,11 @@ void net_init_clients(void) &error_fatal); } -int net_client_parse(QemuOptsList *opts_list, const char *optarg) +void net_client_parse(QemuOptsList *opts_list, const char *optarg) { if (!qemu_opts_parse_noisily(opts_list, optarg, true)) { - return -1; + exit(1); } - - return 0; } /* From FreeBSD */ diff --git a/softmmu/vl.c b/softmmu/vl.c index a4ae131e4d..e69aa43de4 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -2801,21 +2801,15 @@ void qemu_init(int argc, char **argv) break; case QEMU_OPTION_netdev: default_net = 0; - if (net_client_parse(qemu_find_opts("netdev"), optarg) == -1) { - exit(1); - } + net_client_parse(qemu_find_opts("netdev"), optarg); break; case QEMU_OPTION_nic: default_net = 0; - if (net_client_parse(qemu_find_opts("nic"), optarg) == -1) { - exit(1); - } + net_client_parse(qemu_find_opts("nic"), optarg); break; case QEMU_OPTION_net: default_net = 0; - if (net_client_parse(qemu_find_opts("net"), optarg) == -1) { - exit(1); - } + net_client_parse(qemu_find_opts("net"), optarg); break; #ifdef CONFIG_LIBISCSI case QEMU_OPTION_iscsi: From f3eedcddba36dddfb7769a8c96a0f710e894ffc0 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Oct 2022 11:09:09 +0200 Subject: [PATCH 14/26] qapi: net: introduce a way to bypass qemu_opts_parse_noisily() As qemu_opts_parse_noisily() flattens the QAPI structures ("type" field of Netdev structure can collides with "type" field of SocketAddress), we introduce a way to bypass qemu_opts_parse_noisily() and use directly visit_type_Netdev() to parse the backend parameters. More details from Markus: qemu_init() passes the argument of -netdev, -nic, and -net to net_client_parse(). net_client_parse() parses with qemu_opts_parse_noisily(), passing QemuOptsList qemu_netdev_opts for -netdev, qemu_nic_opts for -nic, and qemu_net_opts for -net. Their desc[] are all empty, which means any keys are accepted. The result of the parse (a QemuOpts) is stored in the QemuOptsList. Note that QemuOpts is flat by design. In some places, we layer non-flat on top using dotted keys convention, but not here. net_init_clients() iterates over the stored QemuOpts, and passes them to net_init_netdev(), net_param_nic(), or net_init_client(), respectively. These functions pass the QemuOpts to net_client_init(). They also do other things with the QemuOpts, which we can ignore here. net_client_init() uses the opts visitor to convert the (flat) QemOpts to a (non-flat) QAPI object Netdev. Netdev is also the argument of QMP command netdev_add. The opts visitor was an early attempt to support QAPI in (QemuOpts-based) CLI. It restricts QAPI types to a certain shape; see commit eb7ee2cbeb "qapi: introduce OptsVisitor". A more modern way to support QAPI is qobject_input_visitor_new_str(). It uses keyval_parse() instead of QemuOpts for KEY=VALUE,... syntax, and it also supports JSON syntax. The former isn't quite as expressive as JSON, but it's a lot closer than QemuOpts + opts visitor. This commit paves the way to use of the modern way instead. Signed-off-by: Laurent Vivier Reviewed-by: Markus Armbruster Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- include/net/net.h | 2 ++ net/net.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++ softmmu/vl.c | 6 ++++- 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/include/net/net.h b/include/net/net.h index 55023e7e9f..025dbf1e14 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -220,6 +220,8 @@ extern NICInfo nd_table[MAX_NICS]; extern const char *host_net_devices[]; /* from net.c */ +bool netdev_is_modern(const char *optarg); +void netdev_parse_modern(const char *optarg); void net_client_parse(QemuOptsList *opts_list, const char *str); void show_netdevs(void); void net_init_clients(void); diff --git a/net/net.c b/net/net.c index 178257abfd..128a90f1fd 100644 --- a/net/net.c +++ b/net/net.c @@ -54,6 +54,7 @@ #include "net/colo-compare.h" #include "net/filter.h" #include "qapi/string-output-visitor.h" +#include "qapi/qobject-input-visitor.h" /* Net bridge is currently not supported for W32. */ #if !defined(_WIN32) @@ -63,6 +64,16 @@ static VMChangeStateEntry *net_change_state_entry; static QTAILQ_HEAD(, NetClientState) net_clients; +typedef struct NetdevQueueEntry { + Netdev *nd; + Location loc; + QSIMPLEQ_ENTRY(NetdevQueueEntry) entry; +} NetdevQueueEntry; + +typedef QSIMPLEQ_HEAD(, NetdevQueueEntry) NetdevQueue; + +static NetdevQueue nd_queue = QSIMPLEQ_HEAD_INITIALIZER(nd_queue); + /***********************************************************/ /* network device redirectors */ @@ -1566,6 +1577,20 @@ out: return ret; } +static void netdev_init_modern(void) +{ + while (!QSIMPLEQ_EMPTY(&nd_queue)) { + NetdevQueueEntry *nd = QSIMPLEQ_FIRST(&nd_queue); + + QSIMPLEQ_REMOVE_HEAD(&nd_queue, entry); + loc_push_restore(&nd->loc); + net_client_init1(nd->nd, true, &error_fatal); + loc_pop(&nd->loc); + qapi_free_Netdev(nd->nd); + g_free(nd); + } +} + void net_init_clients(void) { net_change_state_entry = @@ -1573,6 +1598,8 @@ void net_init_clients(void) QTAILQ_INIT(&net_clients); + netdev_init_modern(); + qemu_opts_foreach(qemu_find_opts("netdev"), net_init_netdev, NULL, &error_fatal); @@ -1583,6 +1610,36 @@ void net_init_clients(void) &error_fatal); } +/* + * Does this -netdev argument use modern rather than traditional syntax? + * Modern syntax is to be parsed with netdev_parse_modern(). + * Traditional syntax is to be parsed with net_client_parse(). + */ +bool netdev_is_modern(const char *optarg) +{ + return false; +} + +/* + * netdev_parse_modern() uses modern, more expressive syntax than + * net_client_parse(), but supports only the -netdev option. + * netdev_parse_modern() appends to @nd_queue, whereas net_client_parse() + * appends to @qemu_netdev_opts. + */ +void netdev_parse_modern(const char *optarg) +{ + Visitor *v; + NetdevQueueEntry *nd; + + v = qobject_input_visitor_new_str(optarg, "type", &error_fatal); + nd = g_new(NetdevQueueEntry, 1); + visit_type_Netdev(v, NULL, &nd->nd, &error_fatal); + visit_free(v); + loc_save(&nd->loc); + + QSIMPLEQ_INSERT_TAIL(&nd_queue, nd, entry); +} + void net_client_parse(QemuOptsList *opts_list, const char *optarg) { if (!qemu_opts_parse_noisily(opts_list, optarg, true)) { diff --git a/softmmu/vl.c b/softmmu/vl.c index e69aa43de4..99fb49c7b0 100644 --- a/softmmu/vl.c +++ b/softmmu/vl.c @@ -2801,7 +2801,11 @@ void qemu_init(int argc, char **argv) break; case QEMU_OPTION_netdev: default_net = 0; - net_client_parse(qemu_find_opts("netdev"), optarg); + if (netdev_is_modern(optarg)) { + netdev_parse_modern(optarg); + } else { + net_client_parse(qemu_find_opts("netdev"), optarg); + } break; case QEMU_OPTION_nic: default_net = 0; From 53b85d9574f3c162c0aadbd988f95e207d4c6e31 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Oct 2022 11:09:10 +0200 Subject: [PATCH 15/26] net: introduce qemu_set_info_str() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Embed the setting of info_str in a function. Signed-off-by: Laurent Vivier Reviewed-by: David Gibson Acked-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Jason Wang --- hw/net/xen_nic.c | 5 ++--- include/net/net.h | 1 + net/l2tpv3.c | 3 +-- net/net.c | 17 ++++++++++++----- net/slirp.c | 5 ++--- net/socket.c | 33 ++++++++++++++------------------- net/tap-win32.c | 3 +-- net/tap.c | 13 +++++-------- net/vde.c | 3 +-- net/vhost-user.c | 3 +-- net/vhost-vdpa.c | 2 +- 11 files changed, 41 insertions(+), 47 deletions(-) diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c index 5c815b4f0c..7d92c2d022 100644 --- a/hw/net/xen_nic.c +++ b/hw/net/xen_nic.c @@ -296,9 +296,8 @@ static int net_init(struct XenLegacyDevice *xendev) netdev->nic = qemu_new_nic(&net_xen_info, &netdev->conf, "xen", NULL, netdev); - snprintf(qemu_get_queue(netdev->nic)->info_str, - sizeof(qemu_get_queue(netdev->nic)->info_str), - "nic: xenbus vif macaddr=%s", netdev->mac); + qemu_set_info_str(qemu_get_queue(netdev->nic), + "nic: xenbus vif macaddr=%s", netdev->mac); /* fill info */ xenstore_write_be_int(&netdev->xendev, "feature-rx-copy", 1); diff --git a/include/net/net.h b/include/net/net.h index 025dbf1e14..3db75ff841 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -177,6 +177,7 @@ ssize_t qemu_send_packet_async(NetClientState *nc, const uint8_t *buf, void qemu_purge_queued_packets(NetClientState *nc); void qemu_flush_queued_packets(NetClientState *nc); void qemu_flush_or_purge_queued_packets(NetClientState *nc, bool purge); +void qemu_set_info_str(NetClientState *nc, const char *fmt, ...); void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]); bool qemu_has_ufo(NetClientState *nc); bool qemu_has_vnet_hdr(NetClientState *nc); diff --git a/net/l2tpv3.c b/net/l2tpv3.c index af373e5c30..350041a0d6 100644 --- a/net/l2tpv3.c +++ b/net/l2tpv3.c @@ -723,8 +723,7 @@ int net_init_l2tpv3(const Netdev *netdev, l2tpv3_read_poll(s, true); - snprintf(s->nc.info_str, sizeof(s->nc.info_str), - "l2tpv3: connected"); + qemu_set_info_str(&s->nc, "l2tpv3: connected"); return 0; outerr: qemu_del_net_client(nc); diff --git a/net/net.c b/net/net.c index 128a90f1fd..f29a59cd7f 100644 --- a/net/net.c +++ b/net/net.c @@ -141,13 +141,20 @@ char *qemu_mac_strdup_printf(const uint8_t *macaddr) macaddr[3], macaddr[4], macaddr[5]); } +void qemu_set_info_str(NetClientState *nc, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vsnprintf(nc->info_str, sizeof(nc->info_str), fmt, ap); + va_end(ap); +} + void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]) { - snprintf(nc->info_str, sizeof(nc->info_str), - "model=%s,macaddr=%02x:%02x:%02x:%02x:%02x:%02x", - nc->model, - macaddr[0], macaddr[1], macaddr[2], - macaddr[3], macaddr[4], macaddr[5]); + qemu_set_info_str(nc, "model=%s,macaddr=%02x:%02x:%02x:%02x:%02x:%02x", + nc->model, macaddr[0], macaddr[1], macaddr[2], + macaddr[3], macaddr[4], macaddr[5]); } static int mac_table[256] = {0}; diff --git a/net/slirp.c b/net/slirp.c index 8679be6444..14a8d59277 100644 --- a/net/slirp.c +++ b/net/slirp.c @@ -611,9 +611,8 @@ static int net_slirp_init(NetClientState *peer, const char *model, nc = qemu_new_net_client(&net_slirp_info, peer, model, name); - snprintf(nc->info_str, sizeof(nc->info_str), - "net=%s,restrict=%s", inet_ntoa(net), - restricted ? "on" : "off"); + qemu_set_info_str(nc, "net=%s,restrict=%s", inet_ntoa(net), + restricted ? "on" : "off"); s = DO_UPCAST(SlirpState, nc, nc); diff --git a/net/socket.c b/net/socket.c index bfd8596250..ade1ecf38b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -179,7 +179,7 @@ static void net_socket_send(void *opaque) s->fd = -1; net_socket_rs_init(&s->rs, net_socket_rs_finalize, false); s->nc.link_down = true; - memset(s->nc.info_str, 0, sizeof(s->nc.info_str)); + qemu_set_info_str(&s->nc, ""); return; } @@ -387,16 +387,15 @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer, /* mcast: save bound address as dst */ if (is_connected && mcast != NULL) { s->dgram_dst = saddr; - snprintf(nc->info_str, sizeof(nc->info_str), - "socket: fd=%d (cloned mcast=%s:%d)", - fd, inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); + qemu_set_info_str(nc, "socket: fd=%d (cloned mcast=%s:%d)", fd, + inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); } else { if (sa_type == SOCKET_ADDRESS_TYPE_UNIX) { s->dgram_dst.sin_family = AF_UNIX; } - snprintf(nc->info_str, sizeof(nc->info_str), - "socket: fd=%d %s", fd, SocketAddressType_str(sa_type)); + qemu_set_info_str(nc, "socket: fd=%d %s", fd, + SocketAddressType_str(sa_type)); } return s; @@ -430,7 +429,7 @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer, nc = qemu_new_net_client(&net_socket_info, peer, model, name); - snprintf(nc->info_str, sizeof(nc->info_str), "socket: fd=%d", fd); + qemu_set_info_str(nc, "socket: fd=%d", fd); s = DO_UPCAST(NetSocketState, nc, nc); @@ -497,9 +496,8 @@ static void net_socket_accept(void *opaque) s->fd = fd; s->nc.link_down = false; net_socket_connect(s); - snprintf(s->nc.info_str, sizeof(s->nc.info_str), - "socket: connection from %s:%d", - inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); + qemu_set_info_str(&s->nc, "socket: connection from %s:%d", + inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); } static int net_socket_listen_init(NetClientState *peer, @@ -597,9 +595,8 @@ static int net_socket_connect_init(NetClientState *peer, return -1; } - snprintf(s->nc.info_str, sizeof(s->nc.info_str), - "socket: connect to %s:%d", - inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); + qemu_set_info_str(&s->nc, "socket: connect to %s:%d", + inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); return 0; } @@ -642,9 +639,8 @@ static int net_socket_mcast_init(NetClientState *peer, s->dgram_dst = saddr; - snprintf(s->nc.info_str, sizeof(s->nc.info_str), - "socket: mcast=%s:%d", - inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); + qemu_set_info_str(&s->nc, "socket: mcast=%s:%d", + inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); return 0; } @@ -697,9 +693,8 @@ static int net_socket_udp_init(NetClientState *peer, s->dgram_dst = raddr; - snprintf(s->nc.info_str, sizeof(s->nc.info_str), - "socket: udp=%s:%d", - inet_ntoa(raddr.sin_addr), ntohs(raddr.sin_port)); + qemu_set_info_str(&s->nc, "socket: udp=%s:%d", inet_ntoa(raddr.sin_addr), + ntohs(raddr.sin_port)); return 0; } diff --git a/net/tap-win32.c b/net/tap-win32.c index 7466f22e77..a49c28ba5d 100644 --- a/net/tap-win32.c +++ b/net/tap-win32.c @@ -789,8 +789,7 @@ static int tap_win32_init(NetClientState *peer, const char *model, s = DO_UPCAST(TAPState, nc, nc); - snprintf(s->nc.info_str, sizeof(s->nc.info_str), - "tap: ifname=%s", ifname); + qemu_set_info_str(&s->nc, "tap: ifname=%s", ifname); s->handle = handle; diff --git a/net/tap.c b/net/tap.c index e203d07a12..1210a0436d 100644 --- a/net/tap.c +++ b/net/tap.c @@ -630,8 +630,7 @@ int net_init_bridge(const Netdev *netdev, const char *name, } s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr); - snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper, - br); + qemu_set_info_str(&s->nc, "helper=%s,br=%s", helper, br); return 0; } @@ -690,14 +689,12 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, } if (tap->has_fd || tap->has_fds) { - snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd); + qemu_set_info_str(&s->nc, "fd=%d", fd); } else if (tap->has_helper) { - snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s", - tap->helper); + qemu_set_info_str(&s->nc, "helper=%s", tap->helper); } else { - snprintf(s->nc.info_str, sizeof(s->nc.info_str), - "ifname=%s,script=%s,downscript=%s", ifname, script, - downscript); + qemu_set_info_str(&s->nc, "ifname=%s,script=%s,downscript=%s", ifname, + script, downscript); if (strcmp(downscript, "no") != 0) { snprintf(s->down_script, sizeof(s->down_script), "%s", downscript); diff --git a/net/vde.c b/net/vde.c index 1083916bcf..c0a08662cc 100644 --- a/net/vde.c +++ b/net/vde.c @@ -98,8 +98,7 @@ static int net_vde_init(NetClientState *peer, const char *model, nc = qemu_new_net_client(&net_vde_info, peer, model, name); - snprintf(nc->info_str, sizeof(nc->info_str), "sock=%s,fd=%d", - sock, vde_datafd(vde)); + qemu_set_info_str(nc, "sock=%s,fd=%d", sock, vde_datafd(vde)); s = DO_UPCAST(VDEState, nc, nc); diff --git a/net/vhost-user.c b/net/vhost-user.c index b1a0247b59..3a6b90da86 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -341,8 +341,7 @@ static int net_vhost_user_init(NetClientState *peer, const char *device, user = g_new0(struct VhostUserState, 1); for (i = 0; i < queues; i++) { nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name); - snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s", - i, chr->label); + qemu_set_info_str(nc, "vhost-user%d to %s", i, chr->label); nc->queue_index = i; if (!nc0) { nc0 = nc; diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 9c1b25d782..854ebd61ae 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -544,7 +544,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, device, name); } - snprintf(nc->info_str, sizeof(nc->info_str), TYPE_VHOST_VDPA); + qemu_set_info_str(nc, TYPE_VHOST_VDPA); s = DO_UPCAST(VhostVDPAState, nc, nc); s->vhost_vdpa.device_fd = vdpa_device_fd; From 5166fe0ae46dbfed8cd7e7c3743c591caef81336 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Oct 2022 11:09:11 +0200 Subject: [PATCH 16/26] qapi: net: add stream and dgram netdevs Copied from socket netdev file and modified to use SocketAddress to be able to introduce new features like unix socket. "udp" and "mcast" are squashed into dgram netdev, multicast is detected according to the IP address type. "listen" and "connect" modes are managed by stream netdev. An optional parameter "server" defines the mode (off by default) The two new types need to be parsed the modern way with -netdev, because with the traditional way, the "type" field of netdev structure collides with the "type" field of SocketAddress and prevents the correct evaluation of the command line option. Moreover the traditional way doesn't allow to use the same type (SocketAddress) several times with the -netdev option (needed to specify "local" and "remote" addresses). The previous commit paved the way for parsing the modern way, but omitted one detail: how to pick modern vs. traditional, in netdev_is_modern(). We want to pick based on the value of parameter "type". But how to extract it from the option argument? Parsing the option argument, either the modern or the traditional way, extracts it for us, but only if parsing succeeds. If parsing fails, there is no good option. No matter which parser we pick, it'll be the wrong one for some arguments, and the error reporting will be confusing. Fortunately, the traditional parser accepts *anything* when called in a certain way. This maximizes our chance to extract the value of "type", and in turn minimizes the risk of confusing error reporting. Signed-off-by: Laurent Vivier Reviewed-by: Stefano Brivio Acked-by: Markus Armbruster Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hmp-commands.hx | 2 +- net/clients.h | 6 + net/dgram.c | 537 ++++++++++++++++++++++++++++++++++++++++++++++++ net/hub.c | 2 + net/meson.build | 2 + net/net.c | 30 ++- net/stream.c | 425 ++++++++++++++++++++++++++++++++++++++ qapi/net.json | 66 +++++- qemu-options.hx | 12 ++ 9 files changed, 1078 insertions(+), 4 deletions(-) create mode 100644 net/dgram.c create mode 100644 net/stream.c diff --git a/hmp-commands.hx b/hmp-commands.hx index 12b6d4e2dc..673e39a697 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1276,7 +1276,7 @@ ERST { .name = "netdev_add", .args_type = "netdev:O", - .params = "[user|tap|socket|vde|bridge|hubport|netmap|vhost-user" + .params = "[user|tap|socket|stream|dgram|vde|bridge|hubport|netmap|vhost-user" #ifdef CONFIG_VMNET "|vmnet-host|vmnet-shared|vmnet-bridged" #endif diff --git a/net/clients.h b/net/clients.h index c9157789f2..ed8bdfff1e 100644 --- a/net/clients.h +++ b/net/clients.h @@ -40,6 +40,12 @@ int net_init_hubport(const Netdev *netdev, const char *name, int net_init_socket(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); +int net_init_stream(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp); + +int net_init_dgram(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp); + int net_init_tap(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); diff --git a/net/dgram.c b/net/dgram.c new file mode 100644 index 0000000000..5339585b82 --- /dev/null +++ b/net/dgram.c @@ -0,0 +1,537 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2022 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#include "net/net.h" +#include "clients.h" +#include "monitor/monitor.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/option.h" +#include "qemu/sockets.h" +#include "qemu/iov.h" +#include "qemu/main-loop.h" +#include "qemu/cutils.h" + +typedef struct NetDgramState { + NetClientState nc; + int fd; + SocketReadState rs; + struct sockaddr_in dgram_dst; /* contains destination iff connectionless */ + bool read_poll; /* waiting to receive data? */ + bool write_poll; /* waiting to transmit data? */ +} NetDgramState; + +static void net_dgram_send(void *opaque); +static void net_dgram_writable(void *opaque); + +static void net_dgram_update_fd_handler(NetDgramState *s) +{ + qemu_set_fd_handler(s->fd, + s->read_poll ? net_dgram_send : NULL, + s->write_poll ? net_dgram_writable : NULL, + s); +} + +static void net_dgram_read_poll(NetDgramState *s, bool enable) +{ + s->read_poll = enable; + net_dgram_update_fd_handler(s); +} + +static void net_dgram_write_poll(NetDgramState *s, bool enable) +{ + s->write_poll = enable; + net_dgram_update_fd_handler(s); +} + +static void net_dgram_writable(void *opaque) +{ + NetDgramState *s = opaque; + + net_dgram_write_poll(s, false); + + qemu_flush_queued_packets(&s->nc); +} + +static ssize_t net_dgram_receive(NetClientState *nc, + const uint8_t *buf, size_t size) +{ + NetDgramState *s = DO_UPCAST(NetDgramState, nc, nc); + ssize_t ret; + + do { + if (s->dgram_dst.sin_family != AF_UNIX) { + ret = sendto(s->fd, buf, size, 0, + (struct sockaddr *)&s->dgram_dst, + sizeof(s->dgram_dst)); + } else { + ret = send(s->fd, buf, size, 0); + } + } while (ret == -1 && errno == EINTR); + + if (ret == -1 && errno == EAGAIN) { + net_dgram_write_poll(s, true); + return 0; + } + return ret; +} + +static void net_dgram_send_completed(NetClientState *nc, ssize_t len) +{ + NetDgramState *s = DO_UPCAST(NetDgramState, nc, nc); + + if (!s->read_poll) { + net_dgram_read_poll(s, true); + } +} + +static void net_dgram_rs_finalize(SocketReadState *rs) +{ + NetDgramState *s = container_of(rs, NetDgramState, rs); + + if (qemu_send_packet_async(&s->nc, rs->buf, + rs->packet_len, + net_dgram_send_completed) == 0) { + net_dgram_read_poll(s, false); + } +} + +static void net_dgram_send(void *opaque) +{ + NetDgramState *s = opaque; + int size; + + size = recv(s->fd, s->rs.buf, sizeof(s->rs.buf), 0); + if (size < 0) { + return; + } + if (size == 0) { + /* end of connection */ + net_dgram_read_poll(s, false); + net_dgram_write_poll(s, false); + return; + } + if (qemu_send_packet_async(&s->nc, s->rs.buf, size, + net_dgram_send_completed) == 0) { + net_dgram_read_poll(s, false); + } +} + +static int net_dgram_mcast_create(struct sockaddr_in *mcastaddr, + struct in_addr *localaddr, + Error **errp) +{ + struct ip_mreq imr; + int fd; + int val, ret; +#ifdef __OpenBSD__ + unsigned char loop; +#else + int loop; +#endif + + if (!IN_MULTICAST(ntohl(mcastaddr->sin_addr.s_addr))) { + error_setg(errp, "specified mcastaddr %s (0x%08x) " + "does not contain a multicast address", + inet_ntoa(mcastaddr->sin_addr), + (int)ntohl(mcastaddr->sin_addr.s_addr)); + return -1; + } + + fd = qemu_socket(PF_INET, SOCK_DGRAM, 0); + if (fd < 0) { + error_setg_errno(errp, errno, "can't create datagram socket"); + return -1; + } + + /* + * Allow multiple sockets to bind the same multicast ip and port by setting + * SO_REUSEADDR. This is the only situation where SO_REUSEADDR should be set + * on windows. Use socket_set_fast_reuse otherwise as it sets SO_REUSEADDR + * only on posix systems. + */ + val = 1; + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val)); + if (ret < 0) { + error_setg_errno(errp, errno, "can't set socket option SO_REUSEADDR"); + goto fail; + } + + ret = bind(fd, (struct sockaddr *)mcastaddr, sizeof(*mcastaddr)); + if (ret < 0) { + error_setg_errno(errp, errno, "can't bind ip=%s to socket", + inet_ntoa(mcastaddr->sin_addr)); + goto fail; + } + + /* Add host to multicast group */ + imr.imr_multiaddr = mcastaddr->sin_addr; + if (localaddr) { + imr.imr_interface = *localaddr; + } else { + imr.imr_interface.s_addr = htonl(INADDR_ANY); + } + + ret = setsockopt(fd, IPPROTO_IP, IP_ADD_MEMBERSHIP, + &imr, sizeof(struct ip_mreq)); + if (ret < 0) { + error_setg_errno(errp, errno, + "can't add socket to multicast group %s", + inet_ntoa(imr.imr_multiaddr)); + goto fail; + } + + /* Force mcast msgs to loopback (eg. several QEMUs in same host */ + loop = 1; + ret = setsockopt(fd, IPPROTO_IP, IP_MULTICAST_LOOP, + &loop, sizeof(loop)); + if (ret < 0) { + error_setg_errno(errp, errno, + "can't force multicast message to loopback"); + goto fail; + } + + /* If a bind address is given, only send packets from that address */ + if (localaddr != NULL) { + ret = setsockopt(fd, IPPROTO_IP, IP_MULTICAST_IF, + localaddr, sizeof(*localaddr)); + if (ret < 0) { + error_setg_errno(errp, errno, + "can't set the default network send interface"); + goto fail; + } + } + + qemu_socket_set_nonblock(fd); + return fd; +fail: + if (fd >= 0) { + closesocket(fd); + } + return -1; +} + +static void net_dgram_cleanup(NetClientState *nc) +{ + NetDgramState *s = DO_UPCAST(NetDgramState, nc, nc); + if (s->fd != -1) { + net_dgram_read_poll(s, false); + net_dgram_write_poll(s, false); + close(s->fd); + s->fd = -1; + } +} + +static NetClientInfo net_dgram_socket_info = { + .type = NET_CLIENT_DRIVER_DGRAM, + .size = sizeof(NetDgramState), + .receive = net_dgram_receive, + .cleanup = net_dgram_cleanup, +}; + +static NetDgramState *net_dgram_fd_init(NetClientState *peer, + const char *model, + const char *name, + int fd, int is_fd, + SocketAddress *mcast, + Error **errp) +{ + struct sockaddr_in saddr; + int newfd; + NetClientState *nc; + NetDgramState *s; + SocketAddress *sa; + SocketAddressType sa_type; + + sa = socket_local_address(fd, errp); + if (!sa) { + return NULL; + } + sa_type = sa->type; + qapi_free_SocketAddress(sa); + + /* + * fd passed: multicast: "learn" dgram_dst address from bound address and + * save it. Because this may be "shared" socket from a "master" process, + * datagrams would be recv() by ONLY ONE process: we must "clone" this + * dgram socket --jjo + */ + + if (is_fd && mcast != NULL) { + if (convert_host_port(&saddr, mcast->u.inet.host, + mcast->u.inet.port, errp) < 0) { + goto err; + } + /* must be bound */ + if (saddr.sin_addr.s_addr == 0) { + error_setg(errp, "can't setup multicast destination address"); + goto err; + } + /* clone dgram socket */ + newfd = net_dgram_mcast_create(&saddr, NULL, errp); + if (newfd < 0) { + goto err; + } + /* clone newfd to fd, close newfd */ + dup2(newfd, fd); + close(newfd); + + } + + nc = qemu_new_net_client(&net_dgram_socket_info, peer, model, name); + + s = DO_UPCAST(NetDgramState, nc, nc); + + s->fd = fd; + net_socket_rs_init(&s->rs, net_dgram_rs_finalize, false); + net_dgram_read_poll(s, true); + + /* mcast: save bound address as dst */ + if (is_fd && mcast != NULL) { + s->dgram_dst = saddr; + qemu_set_info_str(nc, "fd=%d (cloned mcast=%s:%d)", fd, + inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); + } else { + if (sa_type == SOCKET_ADDRESS_TYPE_UNIX) { + s->dgram_dst.sin_family = AF_UNIX; + } + + qemu_set_info_str(nc, "fd=%d %s", fd, SocketAddressType_str(sa_type)); + } + + return s; + +err: + closesocket(fd); + return NULL; +} + +static int net_dgram_mcast_init(NetClientState *peer, + const char *model, + const char *name, + SocketAddress *remote, + SocketAddress *local, + Error **errp) +{ + NetDgramState *s; + int fd, ret; + struct sockaddr_in saddr; + + if (remote->type != SOCKET_ADDRESS_TYPE_INET) { + error_setg(errp, "multicast only support inet type"); + return -1; + } + + if (convert_host_port(&saddr, remote->u.inet.host, remote->u.inet.port, + errp) < 0) { + return -1; + } + + if (!local) { + fd = net_dgram_mcast_create(&saddr, NULL, errp); + if (fd < 0) { + return -1; + } + } else { + switch (local->type) { + case SOCKET_ADDRESS_TYPE_INET: { + struct in_addr localaddr; + + if (inet_aton(local->u.inet.host, &localaddr) == 0) { + error_setg(errp, "localaddr '%s' is not a valid IPv4 address", + local->u.inet.host); + return -1; + } + + fd = net_dgram_mcast_create(&saddr, &localaddr, errp); + if (fd < 0) { + return -1; + } + break; + } + case SOCKET_ADDRESS_TYPE_FD: + fd = monitor_fd_param(monitor_cur(), local->u.fd.str, errp); + if (fd == -1) { + return -1; + } + ret = qemu_socket_try_set_nonblock(fd); + if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", + name, fd); + return -1; + } + break; + default: + error_setg(errp, "only support inet or fd type for local"); + return -1; + } + } + + s = net_dgram_fd_init(peer, model, name, fd, + local->type == SOCKET_ADDRESS_TYPE_FD, + remote, errp); + if (!s) { + return -1; + } + + s->dgram_dst = saddr; + + qemu_set_info_str(&s->nc, "mcast=%s:%d", inet_ntoa(saddr.sin_addr), + ntohs(saddr.sin_port)); + return 0; + +} + + +int net_init_dgram(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp) +{ + NetDgramState *s; + int fd, ret; + struct sockaddr_in raddr_in; + struct sockaddr_in laddr_in; + SocketAddress *remote, *local; + + assert(netdev->type == NET_CLIENT_DRIVER_DGRAM); + + remote = netdev->u.dgram.remote; + local = netdev->u.dgram.local; + + /* detect multicast address */ + if (remote && remote->type == SOCKET_ADDRESS_TYPE_INET) { + struct sockaddr_in mcastaddr; + + if (convert_host_port(&mcastaddr, remote->u.inet.host, + remote->u.inet.port, errp) < 0) { + return -1; + } + + if (IN_MULTICAST(ntohl(mcastaddr.sin_addr.s_addr))) { + return net_dgram_mcast_init(peer, "dram", name, remote, local, + errp); + } + } + + /* unicast address */ + if (!local) { + error_setg(errp, "dgram requires local= parameter"); + return -1; + } + + if (remote) { + if (local->type == SOCKET_ADDRESS_TYPE_FD) { + error_setg(errp, "don't set remote with local.fd"); + return -1; + } + if (remote->type != local->type) { + error_setg(errp, "remote and local types must be the same"); + return -1; + } + } else { + if (local->type != SOCKET_ADDRESS_TYPE_FD) { + error_setg(errp, "type=inet requires remote parameter"); + return -1; + } + } + + switch (local->type) { + case SOCKET_ADDRESS_TYPE_INET: + if (convert_host_port(&laddr_in, local->u.inet.host, local->u.inet.port, + errp) < 0) { + return -1; + } + + if (convert_host_port(&raddr_in, remote->u.inet.host, + remote->u.inet.port, errp) < 0) { + return -1; + } + + fd = qemu_socket(PF_INET, SOCK_DGRAM, 0); + if (fd < 0) { + error_setg_errno(errp, errno, "can't create datagram socket"); + return -1; + } + + ret = socket_set_fast_reuse(fd); + if (ret < 0) { + error_setg_errno(errp, errno, + "can't set socket option SO_REUSEADDR"); + closesocket(fd); + return -1; + } + ret = bind(fd, (struct sockaddr *)&laddr_in, sizeof(laddr_in)); + if (ret < 0) { + error_setg_errno(errp, errno, "can't bind ip=%s to socket", + inet_ntoa(laddr_in.sin_addr)); + closesocket(fd); + return -1; + } + qemu_socket_set_nonblock(fd); + break; + case SOCKET_ADDRESS_TYPE_FD: + fd = monitor_fd_param(monitor_cur(), local->u.fd.str, errp); + if (fd == -1) { + return -1; + } + ret = qemu_socket_try_set_nonblock(fd); + if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", + name, fd); + return -1; + } + break; + default: + error_setg(errp, "only support inet or fd type for local"); + return -1; + } + + s = net_dgram_fd_init(peer, "dgram", name, fd, 0, NULL, errp); + if (!s) { + return -1; + } + + if (remote) { + s->dgram_dst = raddr_in; + } + + switch (local->type) { + case SOCKET_ADDRESS_TYPE_INET: + qemu_set_info_str(&s->nc, "udp=%s:%d/%s:%d", + inet_ntoa(laddr_in.sin_addr), + ntohs(laddr_in.sin_port), + inet_ntoa(raddr_in.sin_addr), + ntohs(raddr_in.sin_port)); + break; + case SOCKET_ADDRESS_TYPE_FD: + qemu_set_info_str(&s->nc, "fd=%d", fd); + break; + default: + g_assert_not_reached(); + } + + return 0; +} diff --git a/net/hub.c b/net/hub.c index 1375738bf1..67ca534856 100644 --- a/net/hub.c +++ b/net/hub.c @@ -313,6 +313,8 @@ void net_hub_check_clients(void) case NET_CLIENT_DRIVER_USER: case NET_CLIENT_DRIVER_TAP: case NET_CLIENT_DRIVER_SOCKET: + case NET_CLIENT_DRIVER_STREAM: + case NET_CLIENT_DRIVER_DGRAM: case NET_CLIENT_DRIVER_VDE: case NET_CLIENT_DRIVER_VHOST_USER: has_host_dev = 1; diff --git a/net/meson.build b/net/meson.build index d1be76daf3..6cd1e3dab3 100644 --- a/net/meson.build +++ b/net/meson.build @@ -13,6 +13,8 @@ softmmu_ss.add(files( 'net.c', 'queue.c', 'socket.c', + 'stream.c', + 'dgram.c', 'util.c', )) diff --git a/net/net.c b/net/net.c index f29a59cd7f..840ad9dca5 100644 --- a/net/net.c +++ b/net/net.c @@ -48,6 +48,7 @@ #include "qemu/qemu-print.h" #include "qemu/main-loop.h" #include "qemu/option.h" +#include "qemu/keyval.h" #include "qapi/error.h" #include "qapi/opts-visitor.h" #include "sysemu/runstate.h" @@ -1021,6 +1022,8 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])( #endif [NET_CLIENT_DRIVER_TAP] = net_init_tap, [NET_CLIENT_DRIVER_SOCKET] = net_init_socket, + [NET_CLIENT_DRIVER_STREAM] = net_init_stream, + [NET_CLIENT_DRIVER_DGRAM] = net_init_dgram, #ifdef CONFIG_VDE [NET_CLIENT_DRIVER_VDE] = net_init_vde, #endif @@ -1112,6 +1115,8 @@ void show_netdevs(void) int idx; const char *available_netdevs[] = { "socket", + "stream", + "dgram", "hubport", "tap", #ifdef CONFIG_SLIRP @@ -1624,7 +1629,30 @@ void net_init_clients(void) */ bool netdev_is_modern(const char *optarg) { - return false; + QemuOpts *opts; + bool is_modern; + const char *type; + static QemuOptsList dummy_opts = { + .name = "netdev", + .implied_opt_name = "type", + .head = QTAILQ_HEAD_INITIALIZER(dummy_opts.head), + .desc = { { } }, + }; + + if (optarg[0] == '{') { + /* This is JSON, which means it's modern syntax */ + return true; + } + + opts = qemu_opts_create(&dummy_opts, NULL, false, &error_abort); + qemu_opts_do_parse(opts, optarg, dummy_opts.implied_opt_name, + &error_abort); + type = qemu_opt_get(opts, "type"); + is_modern = !g_strcmp0(type, "stream") || !g_strcmp0(type, "dgram"); + + qemu_opts_reset(&dummy_opts); + + return is_modern; } /* diff --git a/net/stream.c b/net/stream.c new file mode 100644 index 0000000000..0a7e84749a --- /dev/null +++ b/net/stream.c @@ -0,0 +1,425 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2022 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#include "net/net.h" +#include "clients.h" +#include "monitor/monitor.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/option.h" +#include "qemu/sockets.h" +#include "qemu/iov.h" +#include "qemu/main-loop.h" +#include "qemu/cutils.h" + +typedef struct NetStreamState { + NetClientState nc; + int listen_fd; + int fd; + SocketReadState rs; + unsigned int send_index; /* number of bytes sent*/ + bool read_poll; /* waiting to receive data? */ + bool write_poll; /* waiting to transmit data? */ +} NetStreamState; + +static void net_stream_send(void *opaque); +static void net_stream_accept(void *opaque); +static void net_stream_writable(void *opaque); + +static void net_stream_update_fd_handler(NetStreamState *s) +{ + qemu_set_fd_handler(s->fd, + s->read_poll ? net_stream_send : NULL, + s->write_poll ? net_stream_writable : NULL, + s); +} + +static void net_stream_read_poll(NetStreamState *s, bool enable) +{ + s->read_poll = enable; + net_stream_update_fd_handler(s); +} + +static void net_stream_write_poll(NetStreamState *s, bool enable) +{ + s->write_poll = enable; + net_stream_update_fd_handler(s); +} + +static void net_stream_writable(void *opaque) +{ + NetStreamState *s = opaque; + + net_stream_write_poll(s, false); + + qemu_flush_queued_packets(&s->nc); +} + +static ssize_t net_stream_receive(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); + uint32_t len = htonl(size); + struct iovec iov[] = { + { + .iov_base = &len, + .iov_len = sizeof(len), + }, { + .iov_base = (void *)buf, + .iov_len = size, + }, + }; + size_t remaining; + ssize_t ret; + + remaining = iov_size(iov, 2) - s->send_index; + ret = iov_send(s->fd, iov, 2, s->send_index, remaining); + + if (ret == -1 && errno == EAGAIN) { + ret = 0; /* handled further down */ + } + if (ret == -1) { + s->send_index = 0; + return -errno; + } + if (ret < (ssize_t)remaining) { + s->send_index += ret; + net_stream_write_poll(s, true); + return 0; + } + s->send_index = 0; + return size; +} + +static void net_stream_send_completed(NetClientState *nc, ssize_t len) +{ + NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); + + if (!s->read_poll) { + net_stream_read_poll(s, true); + } +} + +static void net_stream_rs_finalize(SocketReadState *rs) +{ + NetStreamState *s = container_of(rs, NetStreamState, rs); + + if (qemu_send_packet_async(&s->nc, rs->buf, + rs->packet_len, + net_stream_send_completed) == 0) { + net_stream_read_poll(s, false); + } +} + +static void net_stream_send(void *opaque) +{ + NetStreamState *s = opaque; + int size; + int ret; + uint8_t buf1[NET_BUFSIZE]; + const uint8_t *buf; + + size = recv(s->fd, buf1, sizeof(buf1), 0); + if (size < 0) { + if (errno != EWOULDBLOCK) { + goto eoc; + } + } else if (size == 0) { + /* end of connection */ + eoc: + net_stream_read_poll(s, false); + net_stream_write_poll(s, false); + if (s->listen_fd != -1) { + qemu_set_fd_handler(s->listen_fd, net_stream_accept, NULL, s); + } + closesocket(s->fd); + + s->fd = -1; + net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); + s->nc.link_down = true; + qemu_set_info_str(&s->nc, ""); + + return; + } + buf = buf1; + + ret = net_fill_rstate(&s->rs, buf, size); + + if (ret == -1) { + goto eoc; + } +} + +static void net_stream_cleanup(NetClientState *nc) +{ + NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); + if (s->fd != -1) { + net_stream_read_poll(s, false); + net_stream_write_poll(s, false); + close(s->fd); + s->fd = -1; + } + if (s->listen_fd != -1) { + qemu_set_fd_handler(s->listen_fd, NULL, NULL, NULL); + closesocket(s->listen_fd); + s->listen_fd = -1; + } +} + +static void net_stream_connect(void *opaque) +{ + NetStreamState *s = opaque; + net_stream_read_poll(s, true); +} + +static NetClientInfo net_stream_info = { + .type = NET_CLIENT_DRIVER_STREAM, + .size = sizeof(NetStreamState), + .receive = net_stream_receive, + .cleanup = net_stream_cleanup, +}; + +static NetStreamState *net_stream_fd_init(NetClientState *peer, + const char *model, + const char *name, + int fd, int is_connected) +{ + NetClientState *nc; + NetStreamState *s; + + nc = qemu_new_net_client(&net_stream_info, peer, model, name); + + qemu_set_info_str(nc, "fd=%d", fd); + + s = DO_UPCAST(NetStreamState, nc, nc); + + s->fd = fd; + s->listen_fd = -1; + net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); + + /* Disable Nagle algorithm on TCP sockets to reduce latency */ + socket_set_nodelay(fd); + + if (is_connected) { + net_stream_connect(s); + } else { + qemu_set_fd_handler(s->fd, NULL, net_stream_connect, s); + } + return s; +} + +static void net_stream_accept(void *opaque) +{ + NetStreamState *s = opaque; + struct sockaddr_in saddr; + socklen_t len; + int fd; + + for (;;) { + len = sizeof(saddr); + fd = qemu_accept(s->listen_fd, (struct sockaddr *)&saddr, &len); + if (fd < 0 && errno != EINTR) { + return; + } else if (fd >= 0) { + qemu_set_fd_handler(s->listen_fd, NULL, NULL, NULL); + break; + } + } + + s->fd = fd; + s->nc.link_down = false; + net_stream_connect(s); + qemu_set_info_str(&s->nc, "connection from %s:%d", + inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); +} + +static int net_stream_server_init(NetClientState *peer, + const char *model, + const char *name, + SocketAddress *addr, + Error **errp) +{ + NetClientState *nc; + NetStreamState *s; + int fd, ret; + + switch (addr->type) { + case SOCKET_ADDRESS_TYPE_INET: { + struct sockaddr_in saddr_in; + + if (convert_host_port(&saddr_in, addr->u.inet.host, addr->u.inet.port, + errp) < 0) { + return -1; + } + + fd = qemu_socket(PF_INET, SOCK_STREAM, 0); + if (fd < 0) { + error_setg_errno(errp, errno, "can't create stream socket"); + return -1; + } + qemu_socket_set_nonblock(fd); + + socket_set_fast_reuse(fd); + + ret = bind(fd, (struct sockaddr *)&saddr_in, sizeof(saddr_in)); + if (ret < 0) { + error_setg_errno(errp, errno, "can't bind ip=%s to socket", + inet_ntoa(saddr_in.sin_addr)); + closesocket(fd); + return -1; + } + break; + } + case SOCKET_ADDRESS_TYPE_FD: + fd = monitor_fd_param(monitor_cur(), addr->u.fd.str, errp); + if (fd == -1) { + return -1; + } + ret = qemu_socket_try_set_nonblock(fd); + if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", + name, fd); + return -1; + } + break; + default: + error_setg(errp, "only support inet or fd type"); + return -1; + } + + ret = listen(fd, 0); + if (ret < 0) { + error_setg_errno(errp, errno, "can't listen on socket"); + closesocket(fd); + return -1; + } + + nc = qemu_new_net_client(&net_stream_info, peer, model, name); + s = DO_UPCAST(NetStreamState, nc, nc); + s->fd = -1; + s->listen_fd = fd; + s->nc.link_down = true; + net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); + + qemu_set_fd_handler(s->listen_fd, net_stream_accept, NULL, s); + return 0; +} + +static int net_stream_client_init(NetClientState *peer, + const char *model, + const char *name, + SocketAddress *addr, + Error **errp) +{ + NetStreamState *s; + struct sockaddr_in saddr_in; + int fd, connected, ret; + + switch (addr->type) { + case SOCKET_ADDRESS_TYPE_INET: + if (convert_host_port(&saddr_in, addr->u.inet.host, addr->u.inet.port, + errp) < 0) { + return -1; + } + + fd = qemu_socket(PF_INET, SOCK_STREAM, 0); + if (fd < 0) { + error_setg_errno(errp, errno, "can't create stream socket"); + return -1; + } + qemu_socket_set_nonblock(fd); + + connected = 0; + for (;;) { + ret = connect(fd, (struct sockaddr *)&saddr_in, sizeof(saddr_in)); + if (ret < 0) { + if (errno == EINTR || errno == EWOULDBLOCK) { + /* continue */ + } else if (errno == EINPROGRESS || + errno == EALREADY || + errno == EINVAL) { + break; + } else { + error_setg_errno(errp, errno, "can't connect socket"); + closesocket(fd); + return -1; + } + } else { + connected = 1; + break; + } + } + break; + case SOCKET_ADDRESS_TYPE_FD: + fd = monitor_fd_param(monitor_cur(), addr->u.fd.str, errp); + if (fd == -1) { + return -1; + } + ret = qemu_socket_try_set_nonblock(fd); + if (ret < 0) { + error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", + name, fd); + return -1; + } + connected = 1; + break; + default: + error_setg(errp, "only support inet or fd type"); + return -1; + } + + s = net_stream_fd_init(peer, model, name, fd, connected); + + switch (addr->type) { + case SOCKET_ADDRESS_TYPE_INET: + qemu_set_info_str(&s->nc, "connect to %s:%d", + inet_ntoa(saddr_in.sin_addr), + ntohs(saddr_in.sin_port)); + break; + case SOCKET_ADDRESS_TYPE_FD: + qemu_set_info_str(&s->nc, "connect to fd %d", fd); + break; + default: + g_assert_not_reached(); + } + + return 0; +} + +int net_init_stream(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp) +{ + const NetdevStreamOptions *sock; + + assert(netdev->type == NET_CLIENT_DRIVER_STREAM); + sock = &netdev->u.stream; + + if (!sock->has_server || !sock->server) { + return net_stream_client_init(peer, "stream", name, sock->addr, errp); + } + return net_stream_server_init(peer, "stream", name, sock->addr, errp); +} diff --git a/qapi/net.json b/qapi/net.json index 926ecc8cca..d476d33c72 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -7,6 +7,7 @@ ## { 'include': 'common.json' } +{ 'include': 'sockets.json' } ## # @set_link: @@ -576,6 +577,60 @@ '*isolated': 'bool' }, 'if': 'CONFIG_VMNET' } +## +# @NetdevStreamOptions: +# +# Configuration info for stream socket netdev +# +# @addr: socket address to listen on (server=true) +# or connect to (server=false) +# @server: create server socket (default: false) +# +# Only SocketAddress types 'inet' and 'fd' are supported. +# +# Since: 7.2 +## +{ 'struct': 'NetdevStreamOptions', + 'data': { + 'addr': 'SocketAddress', + '*server': 'bool' } } + +## +# @NetdevDgramOptions: +# +# Configuration info for datagram socket netdev. +# +# @remote: remote address +# @local: local address +# +# Only SocketAddress types 'inet' and 'fd' are supported. +# +# If remote address is present and it's a multicast address, local address +# is optional. Otherwise local address is required and remote address is +# optional. +# +# .. table:: Valid parameters combination table +# :widths: auto +# +# ============= ======== ===== +# remote local okay? +# ============= ======== ===== +# absent absent no +# absent not fd no +# absent fd yes +# multicast absent yes +# multicast present yes +# not multicast absent no +# not multicast present yes +# ============= ======== ===== +# +# Since: 7.2 +## +{ 'struct': 'NetdevDgramOptions', + 'data': { + '*local': 'SocketAddress', + '*remote': 'SocketAddress' } } + ## # @NetClientDriver: # @@ -587,10 +642,13 @@ # @vmnet-host since 7.1 # @vmnet-shared since 7.1 # @vmnet-bridged since 7.1 +# @stream since 7.2 +# @dgram since 7.2 ## { 'enum': 'NetClientDriver', - 'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde', - 'bridge', 'hubport', 'netmap', 'vhost-user', 'vhost-vdpa', + 'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'stream', + 'dgram', 'vde', 'bridge', 'hubport', 'netmap', 'vhost-user', + 'vhost-vdpa', { 'name': 'vmnet-host', 'if': 'CONFIG_VMNET' }, { 'name': 'vmnet-shared', 'if': 'CONFIG_VMNET' }, { 'name': 'vmnet-bridged', 'if': 'CONFIG_VMNET' }] } @@ -610,6 +668,8 @@ # 'vmnet-host' - since 7.1 # 'vmnet-shared' - since 7.1 # 'vmnet-bridged' - since 7.1 +# 'stream' since 7.2 +# 'dgram' since 7.2 ## { 'union': 'Netdev', 'base': { 'id': 'str', 'type': 'NetClientDriver' }, @@ -620,6 +680,8 @@ 'tap': 'NetdevTapOptions', 'l2tpv3': 'NetdevL2TPv3Options', 'socket': 'NetdevSocketOptions', + 'stream': 'NetdevStreamOptions', + 'dgram': 'NetdevDgramOptions', 'vde': 'NetdevVdeOptions', 'bridge': 'NetdevBridgeOptions', 'hubport': 'NetdevHubPortOptions', diff --git a/qemu-options.hx b/qemu-options.hx index 876a70aa63..01f2b92d6f 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2772,6 +2772,18 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n" " configure a network backend to connect to another network\n" " using an UDP tunnel\n" + "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port\n" + "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n" + " configure a network backend to connect to another network\n" + " using a socket connection in stream mode.\n" + "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n" + "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=fd,local.str=file-descriptor]\n" + " configure a network backend to connect to a multicast maddr and port\n" + " use ``local.host=addr`` to specify the host address to send packets from\n" + "-netdev dgram,id=str,local.type=inet,local.host=addr,local.port=port[,remote.type=inet,remote.host=addr,remote.port=port]\n" + "-netdev dgram,id=str,local.type=fd,local.str=file-descriptor\n" + " configure a network backend to connect to another network\n" + " using an UDP tunnel\n" #ifdef CONFIG_VDE "-netdev vde,id=str[,sock=socketpath][,port=n][,group=groupname][,mode=octalmode]\n" " configure a network backend to connect to port 'n' of a vde switch\n" From daf188ff04ea86fedf447ce366af3d1025020909 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 21 Oct 2022 11:09:12 +0200 Subject: [PATCH 17/26] net: socket: Don't ignore EINVAL on netdev socket connection Other errors are treated as failure by net_socket_connect_init(), but if connect() returns EINVAL, we'll fail silently. Remove the related exception. Signed-off-by: Stefano Brivio Signed-off-by: Laurent Vivier Reviewed-by: David Gibson Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- net/socket.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/socket.c b/net/socket.c index ade1ecf38b..4944bb70d5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -577,8 +577,7 @@ static int net_socket_connect_init(NetClientState *peer, if (errno == EINTR || errno == EWOULDBLOCK) { /* continue */ } else if (errno == EINPROGRESS || - errno == EALREADY || - errno == EINVAL) { + errno == EALREADY) { break; } else { error_setg_errno(errp, errno, "can't connect socket"); From 80d3e4779d6c5fee0402b0b48de15c3c812845a4 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 21 Oct 2022 11:09:13 +0200 Subject: [PATCH 18/26] net: stream: Don't ignore EINVAL on netdev socket connection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Other errors are treated as failure by net_stream_client_init(), but if connect() returns EINVAL, we'll fail silently. Remove the related exception. Signed-off-by: Stefano Brivio [lvivier: applied to net/stream.c] Signed-off-by: Laurent Vivier Reviewed-by: Daniel P. Berrangé Reviewed-by: David Gibson Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- net/stream.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/stream.c b/net/stream.c index 0a7e84749a..e4388fe7e4 100644 --- a/net/stream.c +++ b/net/stream.c @@ -360,8 +360,7 @@ static int net_stream_client_init(NetClientState *peer, if (errno == EINTR || errno == EWOULDBLOCK) { /* continue */ } else if (errno == EINPROGRESS || - errno == EALREADY || - errno == EINVAL) { + errno == EALREADY) { break; } else { error_setg_errno(errp, errno, "can't connect socket"); From 13c6be96618c4435ba412c098bfe35a76355bc45 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Oct 2022 11:09:14 +0200 Subject: [PATCH 19/26] net: stream: add unix socket MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Laurent Vivier Reviewed-by: Stefano Brivio Acked-by: Michael S. Tsirkin Acked-by: Markus Armbruster (QAPI schema) Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Jason Wang --- net/stream.c | 107 +++++++++++++++++++++++++++++++++++++++++++++--- qapi/net.json | 2 +- qemu-options.hx | 1 + 3 files changed, 104 insertions(+), 6 deletions(-) diff --git a/net/stream.c b/net/stream.c index e4388fe7e4..884f473018 100644 --- a/net/stream.c +++ b/net/stream.c @@ -235,7 +235,7 @@ static NetStreamState *net_stream_fd_init(NetClientState *peer, static void net_stream_accept(void *opaque) { NetStreamState *s = opaque; - struct sockaddr_in saddr; + struct sockaddr_storage saddr; socklen_t len; int fd; @@ -253,8 +253,26 @@ static void net_stream_accept(void *opaque) s->fd = fd; s->nc.link_down = false; net_stream_connect(s); - qemu_set_info_str(&s->nc, "connection from %s:%d", - inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); + switch (saddr.ss_family) { + case AF_INET: { + struct sockaddr_in *saddr_in = (struct sockaddr_in *)&saddr; + + qemu_set_info_str(&s->nc, "connection from %s:%d", + inet_ntoa(saddr_in->sin_addr), + ntohs(saddr_in->sin_port)); + break; + } + case AF_UNIX: { + struct sockaddr_un saddr_un; + + len = sizeof(saddr_un); + getsockname(s->listen_fd, (struct sockaddr *)&saddr_un, &len); + qemu_set_info_str(&s->nc, "connect from %s", saddr_un.sun_path); + break; + } + default: + g_assert_not_reached(); + } } static int net_stream_server_init(NetClientState *peer, @@ -294,6 +312,43 @@ static int net_stream_server_init(NetClientState *peer, } break; } + case SOCKET_ADDRESS_TYPE_UNIX: { + struct sockaddr_un saddr_un; + + ret = unlink(addr->u.q_unix.path); + if (ret < 0 && errno != ENOENT) { + error_setg_errno(errp, errno, "failed to unlink socket %s", + addr->u.q_unix.path); + return -1; + } + + saddr_un.sun_family = PF_UNIX; + ret = snprintf(saddr_un.sun_path, sizeof(saddr_un.sun_path), "%s", + addr->u.q_unix.path); + if (ret < 0 || ret >= sizeof(saddr_un.sun_path)) { + error_setg(errp, "UNIX socket path '%s' is too long", + addr->u.q_unix.path); + error_append_hint(errp, "Path must be less than %zu bytes\n", + sizeof(saddr_un.sun_path)); + return -1; + } + + fd = qemu_socket(PF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { + error_setg_errno(errp, errno, "can't create stream socket"); + return -1; + } + qemu_socket_set_nonblock(fd); + + ret = bind(fd, (struct sockaddr *)&saddr_un, sizeof(saddr_un)); + if (ret < 0) { + error_setg_errno(errp, errno, "can't create socket with path: %s", + saddr_un.sun_path); + closesocket(fd); + return -1; + } + break; + } case SOCKET_ADDRESS_TYPE_FD: fd = monitor_fd_param(monitor_cur(), addr->u.fd.str, errp); if (fd == -1) { @@ -337,6 +392,7 @@ static int net_stream_client_init(NetClientState *peer, { NetStreamState *s; struct sockaddr_in saddr_in; + struct sockaddr_un saddr_un; int fd, connected, ret; switch (addr->type) { @@ -373,6 +429,45 @@ static int net_stream_client_init(NetClientState *peer, } } break; + case SOCKET_ADDRESS_TYPE_UNIX: + saddr_un.sun_family = PF_UNIX; + ret = snprintf(saddr_un.sun_path, sizeof(saddr_un.sun_path), "%s", + addr->u.q_unix.path); + if (ret < 0 || ret >= sizeof(saddr_un.sun_path)) { + error_setg(errp, "UNIX socket path '%s' is too long", + addr->u.q_unix.path); + error_append_hint(errp, "Path must be less than %zu bytes\n", + sizeof(saddr_un.sun_path)); + return -1; + } + + fd = qemu_socket(PF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { + error_setg_errno(errp, errno, "can't create stream socket"); + return -1; + } + qemu_socket_set_nonblock(fd); + + connected = 0; + for (;;) { + ret = connect(fd, (struct sockaddr *)&saddr_un, sizeof(saddr_un)); + if (ret < 0) { + if (errno == EINTR || errno == EWOULDBLOCK) { + /* continue */ + } else if (errno == EAGAIN || + errno == EALREADY) { + break; + } else { + error_setg_errno(errp, errno, "can't connect socket"); + closesocket(fd); + return -1; + } + } else { + connected = 1; + break; + } + } + break; case SOCKET_ADDRESS_TYPE_FD: fd = monitor_fd_param(monitor_cur(), addr->u.fd.str, errp); if (fd == -1) { @@ -387,7 +482,7 @@ static int net_stream_client_init(NetClientState *peer, connected = 1; break; default: - error_setg(errp, "only support inet or fd type"); + error_setg(errp, "only support inet, unix or fd type"); return -1; } @@ -399,13 +494,15 @@ static int net_stream_client_init(NetClientState *peer, inet_ntoa(saddr_in.sin_addr), ntohs(saddr_in.sin_port)); break; + case SOCKET_ADDRESS_TYPE_UNIX: + qemu_set_info_str(&s->nc, " connect to %s", saddr_un.sun_path); + break; case SOCKET_ADDRESS_TYPE_FD: qemu_set_info_str(&s->nc, "connect to fd %d", fd); break; default: g_assert_not_reached(); } - return 0; } diff --git a/qapi/net.json b/qapi/net.json index d476d33c72..b4768ce62b 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -586,7 +586,7 @@ # or connect to (server=false) # @server: create server socket (default: false) # -# Only SocketAddress types 'inet' and 'fd' are supported. +# Only SocketAddress types 'unix', 'inet' and 'fd' are supported. # # Since: 7.2 ## diff --git a/qemu-options.hx b/qemu-options.hx index 01f2b92d6f..858f3dc738 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2773,6 +2773,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, " configure a network backend to connect to another network\n" " using an UDP tunnel\n" "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port\n" + "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path\n" "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n" " configure a network backend to connect to another network\n" " using a socket connection in stream mode.\n" From 7c1f0c33cc9d7436172da0882f088b3cfc8a0733 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Oct 2022 11:09:15 +0200 Subject: [PATCH 20/26] net: dgram: make dgram_dst generic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dgram_dst is a sockaddr_in structure. To be able to use it with unix socket, use a pointer to a generic sockaddr structure. Rename it dest_addr, and store socket length in dest_len. Signed-off-by: Laurent Vivier Reviewed-by: Stefano Brivio Acked-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Jason Wang --- net/dgram.c | 82 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 29 deletions(-) diff --git a/net/dgram.c b/net/dgram.c index 5339585b82..e20be9ca79 100644 --- a/net/dgram.c +++ b/net/dgram.c @@ -40,9 +40,11 @@ typedef struct NetDgramState { NetClientState nc; int fd; SocketReadState rs; - struct sockaddr_in dgram_dst; /* contains destination iff connectionless */ bool read_poll; /* waiting to receive data? */ bool write_poll; /* waiting to transmit data? */ + /* contains destination iff connectionless */ + struct sockaddr *dest_addr; + socklen_t dest_len; } NetDgramState; static void net_dgram_send(void *opaque); @@ -84,10 +86,8 @@ static ssize_t net_dgram_receive(NetClientState *nc, ssize_t ret; do { - if (s->dgram_dst.sin_family != AF_UNIX) { - ret = sendto(s->fd, buf, size, 0, - (struct sockaddr *)&s->dgram_dst, - sizeof(s->dgram_dst)); + if (s->dest_addr) { + ret = sendto(s->fd, buf, size, 0, s->dest_addr, s->dest_len); } else { ret = send(s->fd, buf, size, 0); } @@ -244,6 +244,9 @@ static void net_dgram_cleanup(NetClientState *nc) close(s->fd); s->fd = -1; } + g_free(s->dest_addr); + s->dest_addr = NULL; + s->dest_len = 0; } static NetClientInfo net_dgram_socket_info = { @@ -260,7 +263,7 @@ static NetDgramState *net_dgram_fd_init(NetClientState *peer, SocketAddress *mcast, Error **errp) { - struct sockaddr_in saddr; + struct sockaddr_in *saddr = NULL; int newfd; NetClientState *nc; NetDgramState *s; @@ -275,31 +278,32 @@ static NetDgramState *net_dgram_fd_init(NetClientState *peer, qapi_free_SocketAddress(sa); /* - * fd passed: multicast: "learn" dgram_dst address from bound address and + * fd passed: multicast: "learn" dest_addr address from bound address and * save it. Because this may be "shared" socket from a "master" process, * datagrams would be recv() by ONLY ONE process: we must "clone" this * dgram socket --jjo */ if (is_fd && mcast != NULL) { - if (convert_host_port(&saddr, mcast->u.inet.host, - mcast->u.inet.port, errp) < 0) { + saddr = g_new(struct sockaddr_in, 1); + + if (convert_host_port(saddr, mcast->u.inet.host, mcast->u.inet.port, + errp) < 0) { goto err; } /* must be bound */ - if (saddr.sin_addr.s_addr == 0) { + if (saddr->sin_addr.s_addr == 0) { error_setg(errp, "can't setup multicast destination address"); goto err; } /* clone dgram socket */ - newfd = net_dgram_mcast_create(&saddr, NULL, errp); + newfd = net_dgram_mcast_create(saddr, NULL, errp); if (newfd < 0) { goto err; } /* clone newfd to fd, close newfd */ dup2(newfd, fd); close(newfd); - } nc = qemu_new_net_client(&net_dgram_socket_info, peer, model, name); @@ -311,21 +315,20 @@ static NetDgramState *net_dgram_fd_init(NetClientState *peer, net_dgram_read_poll(s, true); /* mcast: save bound address as dst */ - if (is_fd && mcast != NULL) { - s->dgram_dst = saddr; + if (saddr) { + g_assert(s->dest_addr == NULL); + s->dest_addr = (struct sockaddr *)saddr; + s->dest_len = sizeof(*saddr); qemu_set_info_str(nc, "fd=%d (cloned mcast=%s:%d)", fd, - inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); + inet_ntoa(saddr->sin_addr), ntohs(saddr->sin_port)); } else { - if (sa_type == SOCKET_ADDRESS_TYPE_UNIX) { - s->dgram_dst.sin_family = AF_UNIX; - } - qemu_set_info_str(nc, "fd=%d %s", fd, SocketAddressType_str(sa_type)); } return s; err: + g_free(saddr); closesocket(fd); return NULL; } @@ -339,21 +342,24 @@ static int net_dgram_mcast_init(NetClientState *peer, { NetDgramState *s; int fd, ret; - struct sockaddr_in saddr; + struct sockaddr_in *saddr; if (remote->type != SOCKET_ADDRESS_TYPE_INET) { error_setg(errp, "multicast only support inet type"); return -1; } - if (convert_host_port(&saddr, remote->u.inet.host, remote->u.inet.port, + saddr = g_new(struct sockaddr_in, 1); + if (convert_host_port(saddr, remote->u.inet.host, remote->u.inet.port, errp) < 0) { + g_free(saddr); return -1; } if (!local) { - fd = net_dgram_mcast_create(&saddr, NULL, errp); + fd = net_dgram_mcast_create(saddr, NULL, errp); if (fd < 0) { + g_free(saddr); return -1; } } else { @@ -362,13 +368,15 @@ static int net_dgram_mcast_init(NetClientState *peer, struct in_addr localaddr; if (inet_aton(local->u.inet.host, &localaddr) == 0) { + g_free(saddr); error_setg(errp, "localaddr '%s' is not a valid IPv4 address", local->u.inet.host); return -1; } - fd = net_dgram_mcast_create(&saddr, &localaddr, errp); + fd = net_dgram_mcast_create(saddr, &localaddr, errp); if (fd < 0) { + g_free(saddr); return -1; } break; @@ -376,16 +384,19 @@ static int net_dgram_mcast_init(NetClientState *peer, case SOCKET_ADDRESS_TYPE_FD: fd = monitor_fd_param(monitor_cur(), local->u.fd.str, errp); if (fd == -1) { + g_free(saddr); return -1; } ret = qemu_socket_try_set_nonblock(fd); if (ret < 0) { + g_free(saddr); error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", name, fd); return -1; } break; default: + g_free(saddr); error_setg(errp, "only support inet or fd type for local"); return -1; } @@ -395,13 +406,17 @@ static int net_dgram_mcast_init(NetClientState *peer, local->type == SOCKET_ADDRESS_TYPE_FD, remote, errp); if (!s) { + g_free(saddr); return -1; } - s->dgram_dst = saddr; + g_assert(s->dest_addr == NULL); + s->dest_addr = (struct sockaddr *)saddr; + s->dest_len = sizeof(*saddr); + + qemu_set_info_str(&s->nc, "mcast=%s:%d", inet_ntoa(saddr->sin_addr), + ntohs(saddr->sin_port)); - qemu_set_info_str(&s->nc, "mcast=%s:%d", inet_ntoa(saddr.sin_addr), - ntohs(saddr.sin_port)); return 0; } @@ -412,9 +427,10 @@ int net_init_dgram(const Netdev *netdev, const char *name, { NetDgramState *s; int fd, ret; - struct sockaddr_in raddr_in; - struct sockaddr_in laddr_in; SocketAddress *remote, *local; + struct sockaddr *dest_addr; + struct sockaddr_in laddr_in, raddr_in; + socklen_t dest_len; assert(netdev->type == NET_CLIENT_DRIVER_DGRAM); @@ -491,6 +507,10 @@ int net_init_dgram(const Netdev *netdev, const char *name, return -1; } qemu_socket_set_nonblock(fd); + + dest_len = sizeof(raddr_in); + dest_addr = g_malloc(dest_len); + memcpy(dest_addr, &raddr_in, dest_len); break; case SOCKET_ADDRESS_TYPE_FD: fd = monitor_fd_param(monitor_cur(), local->u.fd.str, errp); @@ -503,6 +523,8 @@ int net_init_dgram(const Netdev *netdev, const char *name, name, fd); return -1; } + dest_addr = NULL; + dest_len = 0; break; default: error_setg(errp, "only support inet or fd type for local"); @@ -515,7 +537,9 @@ int net_init_dgram(const Netdev *netdev, const char *name, } if (remote) { - s->dgram_dst = raddr_in; + g_assert(s->dest_addr == NULL); + s->dest_addr = dest_addr; + s->dest_len = dest_len; } switch (local->type) { From 8ecc7f40bc76f2a565a74d1e5b2d7462cf050f1d Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Oct 2022 11:09:16 +0200 Subject: [PATCH 21/26] net: dgram: move mcast specific code from net_socket_fd_init_dgram() It is less complex to manage special cases directly in net_dgram_mcast_init() and net_dgram_udp_init(). Signed-off-by: Laurent Vivier Reviewed-by: Stefano Brivio Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- net/dgram.c | 143 ++++++++++++++++++++++++++++------------------------ 1 file changed, 76 insertions(+), 67 deletions(-) diff --git a/net/dgram.c b/net/dgram.c index e20be9ca79..e581cc62f3 100644 --- a/net/dgram.c +++ b/net/dgram.c @@ -259,52 +259,11 @@ static NetClientInfo net_dgram_socket_info = { static NetDgramState *net_dgram_fd_init(NetClientState *peer, const char *model, const char *name, - int fd, int is_fd, - SocketAddress *mcast, + int fd, Error **errp) { - struct sockaddr_in *saddr = NULL; - int newfd; NetClientState *nc; NetDgramState *s; - SocketAddress *sa; - SocketAddressType sa_type; - - sa = socket_local_address(fd, errp); - if (!sa) { - return NULL; - } - sa_type = sa->type; - qapi_free_SocketAddress(sa); - - /* - * fd passed: multicast: "learn" dest_addr address from bound address and - * save it. Because this may be "shared" socket from a "master" process, - * datagrams would be recv() by ONLY ONE process: we must "clone" this - * dgram socket --jjo - */ - - if (is_fd && mcast != NULL) { - saddr = g_new(struct sockaddr_in, 1); - - if (convert_host_port(saddr, mcast->u.inet.host, mcast->u.inet.port, - errp) < 0) { - goto err; - } - /* must be bound */ - if (saddr->sin_addr.s_addr == 0) { - error_setg(errp, "can't setup multicast destination address"); - goto err; - } - /* clone dgram socket */ - newfd = net_dgram_mcast_create(saddr, NULL, errp); - if (newfd < 0) { - goto err; - } - /* clone newfd to fd, close newfd */ - dup2(newfd, fd); - close(newfd); - } nc = qemu_new_net_client(&net_dgram_socket_info, peer, model, name); @@ -314,23 +273,7 @@ static NetDgramState *net_dgram_fd_init(NetClientState *peer, net_socket_rs_init(&s->rs, net_dgram_rs_finalize, false); net_dgram_read_poll(s, true); - /* mcast: save bound address as dst */ - if (saddr) { - g_assert(s->dest_addr == NULL); - s->dest_addr = (struct sockaddr *)saddr; - s->dest_len = sizeof(*saddr); - qemu_set_info_str(nc, "fd=%d (cloned mcast=%s:%d)", fd, - inet_ntoa(saddr->sin_addr), ntohs(saddr->sin_port)); - } else { - qemu_set_info_str(nc, "fd=%d %s", fd, SocketAddressType_str(sa_type)); - } - return s; - -err: - g_free(saddr); - closesocket(fd); - return NULL; } static int net_dgram_mcast_init(NetClientState *peer, @@ -381,7 +324,9 @@ static int net_dgram_mcast_init(NetClientState *peer, } break; } - case SOCKET_ADDRESS_TYPE_FD: + case SOCKET_ADDRESS_TYPE_FD: { + int newfd; + fd = monitor_fd_param(monitor_cur(), local->u.fd.str, errp); if (fd == -1) { g_free(saddr); @@ -394,7 +339,42 @@ static int net_dgram_mcast_init(NetClientState *peer, name, fd); return -1; } + + /* + * fd passed: multicast: "learn" dest_addr address from bound + * address and save it. Because this may be "shared" socket from a + * "master" process, datagrams would be recv() by ONLY ONE process: + * we must "clone" this dgram socket --jjo + */ + + saddr = g_new(struct sockaddr_in, 1); + + if (convert_host_port(saddr, local->u.inet.host, local->u.inet.port, + errp) < 0) { + g_free(saddr); + closesocket(fd); + return -1; + } + + /* must be bound */ + if (saddr->sin_addr.s_addr == 0) { + error_setg(errp, "can't setup multicast destination address"); + g_free(saddr); + closesocket(fd); + return -1; + } + /* clone dgram socket */ + newfd = net_dgram_mcast_create(saddr, NULL, errp); + if (newfd < 0) { + g_free(saddr); + closesocket(fd); + return -1; + } + /* clone newfd to fd, close newfd */ + dup2(newfd, fd); + close(newfd); break; + } default: g_free(saddr); error_setg(errp, "only support inet or fd type for local"); @@ -402,9 +382,7 @@ static int net_dgram_mcast_init(NetClientState *peer, } } - s = net_dgram_fd_init(peer, model, name, fd, - local->type == SOCKET_ADDRESS_TYPE_FD, - remote, errp); + s = net_dgram_fd_init(peer, model, name, fd, errp); if (!s) { g_free(saddr); return -1; @@ -414,8 +392,26 @@ static int net_dgram_mcast_init(NetClientState *peer, s->dest_addr = (struct sockaddr *)saddr; s->dest_len = sizeof(*saddr); - qemu_set_info_str(&s->nc, "mcast=%s:%d", inet_ntoa(saddr->sin_addr), - ntohs(saddr->sin_port)); + if (!local) { + qemu_set_info_str(&s->nc, "mcast=%s:%d", + inet_ntoa(saddr->sin_addr), + ntohs(saddr->sin_port)); + } else { + switch (local->type) { + case SOCKET_ADDRESS_TYPE_INET: + qemu_set_info_str(&s->nc, "mcast=%s:%d", + inet_ntoa(saddr->sin_addr), + ntohs(saddr->sin_port)); + break; + case SOCKET_ADDRESS_TYPE_FD: + qemu_set_info_str(&s->nc, "fd=%d (cloned mcast=%s:%d)", + fd, inet_ntoa(saddr->sin_addr), + ntohs(saddr->sin_port)); + break; + default: + g_assert_not_reached(); + } + } return 0; @@ -531,7 +527,7 @@ int net_init_dgram(const Netdev *netdev, const char *name, return -1; } - s = net_dgram_fd_init(peer, "dgram", name, fd, 0, NULL, errp); + s = net_dgram_fd_init(peer, "dgram", name, fd, errp); if (!s) { return -1; } @@ -550,9 +546,22 @@ int net_init_dgram(const Netdev *netdev, const char *name, inet_ntoa(raddr_in.sin_addr), ntohs(raddr_in.sin_port)); break; - case SOCKET_ADDRESS_TYPE_FD: - qemu_set_info_str(&s->nc, "fd=%d", fd); + case SOCKET_ADDRESS_TYPE_FD: { + SocketAddress *sa; + SocketAddressType sa_type; + + sa = socket_local_address(fd, errp); + if (sa) { + sa_type = sa->type; + qapi_free_SocketAddress(sa); + + qemu_set_info_str(&s->nc, "fd=%d %s", fd, + SocketAddressType_str(sa_type)); + } else { + qemu_set_info_str(&s->nc, "fd=%d", fd); + } break; + } default: g_assert_not_reached(); } From 784e7a2531040824f88f33494be256a9e331e219 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Oct 2022 11:09:17 +0200 Subject: [PATCH 22/26] net: dgram: add unix socket Signed-off-by: Laurent Vivier Reviewed-by: Stefano Brivio Reviewed-by: David Gibson Acked-by: Michael S. Tsirkin Acked-by: Markus Armbruster (QAPI schema) Signed-off-by: Jason Wang --- net/dgram.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++- qapi/net.json | 2 +- qemu-options.hx | 1 + 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/net/dgram.c b/net/dgram.c index e581cc62f3..9f7bf38376 100644 --- a/net/dgram.c +++ b/net/dgram.c @@ -426,6 +426,7 @@ int net_init_dgram(const Netdev *netdev, const char *name, SocketAddress *remote, *local; struct sockaddr *dest_addr; struct sockaddr_in laddr_in, raddr_in; + struct sockaddr_un laddr_un, raddr_un; socklen_t dest_len; assert(netdev->type == NET_CLIENT_DRIVER_DGRAM); @@ -465,7 +466,8 @@ int net_init_dgram(const Netdev *netdev, const char *name, } } else { if (local->type != SOCKET_ADDRESS_TYPE_FD) { - error_setg(errp, "type=inet requires remote parameter"); + error_setg(errp, + "type=inet or type=unix requires remote parameter"); return -1; } } @@ -508,6 +510,53 @@ int net_init_dgram(const Netdev *netdev, const char *name, dest_addr = g_malloc(dest_len); memcpy(dest_addr, &raddr_in, dest_len); break; + case SOCKET_ADDRESS_TYPE_UNIX: + ret = unlink(local->u.q_unix.path); + if (ret < 0 && errno != ENOENT) { + error_setg_errno(errp, errno, "failed to unlink socket %s", + local->u.q_unix.path); + return -1; + } + + laddr_un.sun_family = PF_UNIX; + ret = snprintf(laddr_un.sun_path, sizeof(laddr_un.sun_path), "%s", + local->u.q_unix.path); + if (ret < 0 || ret >= sizeof(laddr_un.sun_path)) { + error_setg(errp, "UNIX socket path '%s' is too long", + local->u.q_unix.path); + error_append_hint(errp, "Path must be less than %zu bytes\n", + sizeof(laddr_un.sun_path)); + } + + raddr_un.sun_family = PF_UNIX; + ret = snprintf(raddr_un.sun_path, sizeof(raddr_un.sun_path), "%s", + remote->u.q_unix.path); + if (ret < 0 || ret >= sizeof(raddr_un.sun_path)) { + error_setg(errp, "UNIX socket path '%s' is too long", + remote->u.q_unix.path); + error_append_hint(errp, "Path must be less than %zu bytes\n", + sizeof(raddr_un.sun_path)); + } + + fd = qemu_socket(PF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) { + error_setg_errno(errp, errno, "can't create datagram socket"); + return -1; + } + + ret = bind(fd, (struct sockaddr *)&laddr_un, sizeof(laddr_un)); + if (ret < 0) { + error_setg_errno(errp, errno, "can't bind unix=%s to socket", + laddr_un.sun_path); + closesocket(fd); + return -1; + } + qemu_socket_set_nonblock(fd); + + dest_len = sizeof(raddr_un); + dest_addr = g_malloc(dest_len); + memcpy(dest_addr, &raddr_un, dest_len); + break; case SOCKET_ADDRESS_TYPE_FD: fd = monitor_fd_param(monitor_cur(), local->u.fd.str, errp); if (fd == -1) { @@ -546,6 +595,10 @@ int net_init_dgram(const Netdev *netdev, const char *name, inet_ntoa(raddr_in.sin_addr), ntohs(raddr_in.sin_port)); break; + case SOCKET_ADDRESS_TYPE_UNIX: + qemu_set_info_str(&s->nc, "udp=%s:%s", + laddr_un.sun_path, raddr_un.sun_path); + break; case SOCKET_ADDRESS_TYPE_FD: { SocketAddress *sa; SocketAddressType sa_type; diff --git a/qapi/net.json b/qapi/net.json index b4768ce62b..cbc8d77c97 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -603,7 +603,7 @@ # @remote: remote address # @local: local address # -# Only SocketAddress types 'inet' and 'fd' are supported. +# Only SocketAddress types 'unix', 'inet' and 'fd' are supported. # # If remote address is present and it's a multicast address, local address # is optional. Otherwise local address is required and remote address is diff --git a/qemu-options.hx b/qemu-options.hx index 858f3dc738..e76142bfb6 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2782,6 +2782,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, " configure a network backend to connect to a multicast maddr and port\n" " use ``local.host=addr`` to specify the host address to send packets from\n" "-netdev dgram,id=str,local.type=inet,local.host=addr,local.port=port[,remote.type=inet,remote.host=addr,remote.port=port]\n" + "-netdev dgram,id=str,local.type=unix,local.path=path[,remote.type=unix,remote.path=path]\n" "-netdev dgram,id=str,local.type=fd,local.str=file-descriptor\n" " configure a network backend to connect to another network\n" " using an UDP tunnel\n" From 18bf1c94565b1b594873aaea9dfd47c83abd8543 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Oct 2022 11:09:18 +0200 Subject: [PATCH 23/26] qemu-sockets: move and rename SocketAddress_to_str() Rename SocketAddress_to_str() to socket_uri() and move it to util/qemu-sockets.c close to socket_parse(). socket_uri() generates a string from a SocketAddress while socket_parse() generates a SocketAddress from a string. Signed-off-by: Laurent Vivier Reviewed-by: David Gibson Reviewed-by: Dr. David Alan Gilbert Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- include/qemu/sockets.h | 2 +- monitor/hmp-cmds.c | 23 +---------------------- util/qemu-sockets.c | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index db4bedb6fa..214058d8e3 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -58,6 +58,7 @@ NetworkAddressFamily inet_netfamily(int family); int unix_listen(const char *path, Error **errp); int unix_connect(const char *path, Error **errp); +char *socket_uri(SocketAddress *addr); SocketAddress *socket_parse(const char *str, Error **errp); int socket_connect(SocketAddress *addr, Error **errp); int socket_listen(SocketAddress *addr, int num, Error **errp); @@ -141,5 +142,4 @@ SocketAddress *socket_address_flatten(SocketAddressLegacy *addr); * Return 0 on success. */ int socket_address_parse_named_fd(SocketAddress *addr, Error **errp); - #endif /* QEMU_SOCKETS_H */ diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index bab86c5537..01b789a79e 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -199,27 +199,6 @@ void hmp_info_mice(Monitor *mon, const QDict *qdict) qapi_free_MouseInfoList(mice_list); } -static char *SocketAddress_to_str(SocketAddress *addr) -{ - switch (addr->type) { - case SOCKET_ADDRESS_TYPE_INET: - return g_strdup_printf("tcp:%s:%s", - addr->u.inet.host, - addr->u.inet.port); - case SOCKET_ADDRESS_TYPE_UNIX: - return g_strdup_printf("unix:%s", - addr->u.q_unix.path); - case SOCKET_ADDRESS_TYPE_FD: - return g_strdup_printf("fd:%s", addr->u.fd.str); - case SOCKET_ADDRESS_TYPE_VSOCK: - return g_strdup_printf("tcp:%s:%s", - addr->u.vsock.cid, - addr->u.vsock.port); - default: - return g_strdup("unknown address type"); - } -} - void hmp_info_migrate(Monitor *mon, const QDict *qdict) { MigrationInfo *info; @@ -382,7 +361,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) monitor_printf(mon, "socket address: [\n"); for (addr = info->socket_address; addr; addr = addr->next) { - char *s = SocketAddress_to_str(addr->value); + char *s = socket_uri(addr->value); monitor_printf(mon, "\t%s\n", s); g_free(s); } diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 83f4bd6fd2..9f6f655fd5 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -1077,6 +1077,26 @@ int unix_connect(const char *path, Error **errp) return sock; } +char *socket_uri(SocketAddress *addr) +{ + switch (addr->type) { + case SOCKET_ADDRESS_TYPE_INET: + return g_strdup_printf("tcp:%s:%s", + addr->u.inet.host, + addr->u.inet.port); + case SOCKET_ADDRESS_TYPE_UNIX: + return g_strdup_printf("unix:%s", + addr->u.q_unix.path); + case SOCKET_ADDRESS_TYPE_FD: + return g_strdup_printf("fd:%s", addr->u.fd.str); + case SOCKET_ADDRESS_TYPE_VSOCK: + return g_strdup_printf("tcp:%s:%s", + addr->u.vsock.cid, + addr->u.vsock.port); + default: + return g_strdup("unknown address type"); + } +} SocketAddress *socket_parse(const char *str, Error **errp) { From 7651b3211904eda3207d50f31c7f9acc8c375cec Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Oct 2022 11:09:19 +0200 Subject: [PATCH 24/26] qemu-sockets: update socket_uri() and socket_parse() to be consistent To be consistent with socket_uri(), add 'tcp:' prefix for inet type in socket_parse(), by default socket_parse() use tcp when no prefix is provided (format is host:port). In socket_uri(), use 'vsock:' prefix for vsock type rather than 'tcp:' because it makes a vsock address look like an inet address with CID misinterpreted as host. Goes back to commit 9aca82ba31 "migration: Create socket-address parameter" Signed-off-by: Laurent Vivier Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Markus Armbruster Reviewed-by: David Gibson Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- util/qemu-sockets.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 9f6f655fd5..a9926af714 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -1090,7 +1090,7 @@ char *socket_uri(SocketAddress *addr) case SOCKET_ADDRESS_TYPE_FD: return g_strdup_printf("fd:%s", addr->u.fd.str); case SOCKET_ADDRESS_TYPE_VSOCK: - return g_strdup_printf("tcp:%s:%s", + return g_strdup_printf("vsock:%s:%s", addr->u.vsock.cid, addr->u.vsock.port); default: @@ -1124,6 +1124,11 @@ SocketAddress *socket_parse(const char *str, Error **errp) if (vsock_parse(&addr->u.vsock, str + strlen("vsock:"), errp)) { goto fail; } + } else if (strstart(str, "tcp:", NULL)) { + addr->type = SOCKET_ADDRESS_TYPE_INET; + if (inet_parse(&addr->u.inet, str + strlen("tcp:"), errp)) { + goto fail; + } } else { addr->type = SOCKET_ADDRESS_TYPE_INET; if (inet_parse(&addr->u.inet, str, errp)) { From 1f9c890fa3b6a09b34aea915500f9f002b5d9d60 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Oct 2022 11:09:20 +0200 Subject: [PATCH 25/26] net: stream: move to QIO to enable additional parameters Use QIOChannel, QIOChannelSocket and QIONetListener. This allows net/stream to use all the available parameters provided by SocketAddress. Signed-off-by: Laurent Vivier Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- net/stream.c | 516 ++++++++++++++++++------------------------------ qemu-options.hx | 4 +- 2 files changed, 190 insertions(+), 330 deletions(-) diff --git a/net/stream.c b/net/stream.c index 884f473018..54c67e14d2 100644 --- a/net/stream.c +++ b/net/stream.c @@ -35,48 +35,36 @@ #include "qemu/iov.h" #include "qemu/main-loop.h" #include "qemu/cutils.h" +#include "io/channel.h" +#include "io/channel-socket.h" +#include "io/net-listener.h" typedef struct NetStreamState { NetClientState nc; - int listen_fd; - int fd; + QIOChannel *listen_ioc; + QIONetListener *listener; + QIOChannel *ioc; + guint ioc_read_tag; + guint ioc_write_tag; SocketReadState rs; unsigned int send_index; /* number of bytes sent*/ - bool read_poll; /* waiting to receive data? */ - bool write_poll; /* waiting to transmit data? */ } NetStreamState; -static void net_stream_send(void *opaque); -static void net_stream_accept(void *opaque); -static void net_stream_writable(void *opaque); +static void net_stream_listen(QIONetListener *listener, + QIOChannelSocket *cioc, + void *opaque); -static void net_stream_update_fd_handler(NetStreamState *s) +static gboolean net_stream_writable(QIOChannel *ioc, + GIOCondition condition, + gpointer data) { - qemu_set_fd_handler(s->fd, - s->read_poll ? net_stream_send : NULL, - s->write_poll ? net_stream_writable : NULL, - s); -} + NetStreamState *s = data; -static void net_stream_read_poll(NetStreamState *s, bool enable) -{ - s->read_poll = enable; - net_stream_update_fd_handler(s); -} - -static void net_stream_write_poll(NetStreamState *s, bool enable) -{ - s->write_poll = enable; - net_stream_update_fd_handler(s); -} - -static void net_stream_writable(void *opaque) -{ - NetStreamState *s = opaque; - - net_stream_write_poll(s, false); + s->ioc_write_tag = 0; qemu_flush_queued_packets(&s->nc); + + return G_SOURCE_REMOVE; } static ssize_t net_stream_receive(NetClientState *nc, const uint8_t *buf, @@ -93,13 +81,15 @@ static ssize_t net_stream_receive(NetClientState *nc, const uint8_t *buf, .iov_len = size, }, }; + struct iovec local_iov[2]; + unsigned int nlocal_iov; size_t remaining; ssize_t ret; remaining = iov_size(iov, 2) - s->send_index; - ret = iov_send(s->fd, iov, 2, s->send_index, remaining); - - if (ret == -1 && errno == EAGAIN) { + nlocal_iov = iov_copy(local_iov, 2, iov, 2, s->send_index, remaining); + ret = qio_channel_writev(s->ioc, local_iov, nlocal_iov, NULL); + if (ret == QIO_CHANNEL_ERR_BLOCK) { ret = 0; /* handled further down */ } if (ret == -1) { @@ -108,19 +98,25 @@ static ssize_t net_stream_receive(NetClientState *nc, const uint8_t *buf, } if (ret < (ssize_t)remaining) { s->send_index += ret; - net_stream_write_poll(s, true); + s->ioc_write_tag = qio_channel_add_watch(s->ioc, G_IO_OUT, + net_stream_writable, s, NULL); return 0; } s->send_index = 0; return size; } +static gboolean net_stream_send(QIOChannel *ioc, + GIOCondition condition, + gpointer data); + static void net_stream_send_completed(NetClientState *nc, ssize_t len) { NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); - if (!s->read_poll) { - net_stream_read_poll(s, true); + if (!s->ioc_read_tag) { + s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, + net_stream_send, s, NULL); } } @@ -131,19 +127,24 @@ static void net_stream_rs_finalize(SocketReadState *rs) if (qemu_send_packet_async(&s->nc, rs->buf, rs->packet_len, net_stream_send_completed) == 0) { - net_stream_read_poll(s, false); + if (s->ioc_read_tag) { + g_source_remove(s->ioc_read_tag); + s->ioc_read_tag = 0; + } } } -static void net_stream_send(void *opaque) +static gboolean net_stream_send(QIOChannel *ioc, + GIOCondition condition, + gpointer data) { - NetStreamState *s = opaque; + NetStreamState *s = data; int size; int ret; - uint8_t buf1[NET_BUFSIZE]; - const uint8_t *buf; + char buf1[NET_BUFSIZE]; + const char *buf; - size = recv(s->fd, buf1, sizeof(buf1), 0); + size = qio_channel_read(s->ioc, buf1, sizeof(buf1), NULL); if (size < 0) { if (errno != EWOULDBLOCK) { goto eoc; @@ -151,51 +152,63 @@ static void net_stream_send(void *opaque) } else if (size == 0) { /* end of connection */ eoc: - net_stream_read_poll(s, false); - net_stream_write_poll(s, false); - if (s->listen_fd != -1) { - qemu_set_fd_handler(s->listen_fd, net_stream_accept, NULL, s); + s->ioc_read_tag = 0; + if (s->ioc_write_tag) { + g_source_remove(s->ioc_write_tag); + s->ioc_write_tag = 0; } - closesocket(s->fd); + if (s->listener) { + qio_net_listener_set_client_func(s->listener, net_stream_listen, + s, NULL); + } + object_unref(OBJECT(s->ioc)); + s->ioc = NULL; - s->fd = -1; net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); s->nc.link_down = true; qemu_set_info_str(&s->nc, ""); - return; + return G_SOURCE_REMOVE; } buf = buf1; - ret = net_fill_rstate(&s->rs, buf, size); + ret = net_fill_rstate(&s->rs, (const uint8_t *)buf, size); if (ret == -1) { goto eoc; } + + return G_SOURCE_CONTINUE; } static void net_stream_cleanup(NetClientState *nc) { NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); - if (s->fd != -1) { - net_stream_read_poll(s, false); - net_stream_write_poll(s, false); - close(s->fd); - s->fd = -1; + if (s->ioc) { + if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) { + if (s->ioc_read_tag) { + g_source_remove(s->ioc_read_tag); + s->ioc_read_tag = 0; + } + if (s->ioc_write_tag) { + g_source_remove(s->ioc_write_tag); + s->ioc_write_tag = 0; + } + } + object_unref(OBJECT(s->ioc)); + s->ioc = NULL; } - if (s->listen_fd != -1) { - qemu_set_fd_handler(s->listen_fd, NULL, NULL, NULL); - closesocket(s->listen_fd); - s->listen_fd = -1; + if (s->listen_ioc) { + if (s->listener) { + qio_net_listener_disconnect(s->listener); + object_unref(OBJECT(s->listener)); + s->listener = NULL; + } + object_unref(OBJECT(s->listen_ioc)); + s->listen_ioc = NULL; } } -static void net_stream_connect(void *opaque) -{ - NetStreamState *s = opaque; - net_stream_read_poll(s, true); -} - static NetClientInfo net_stream_info = { .type = NET_CLIENT_DRIVER_STREAM, .size = sizeof(NetStreamState), @@ -203,76 +216,66 @@ static NetClientInfo net_stream_info = { .cleanup = net_stream_cleanup, }; -static NetStreamState *net_stream_fd_init(NetClientState *peer, - const char *model, - const char *name, - int fd, int is_connected) -{ - NetClientState *nc; - NetStreamState *s; - - nc = qemu_new_net_client(&net_stream_info, peer, model, name); - - qemu_set_info_str(nc, "fd=%d", fd); - - s = DO_UPCAST(NetStreamState, nc, nc); - - s->fd = fd; - s->listen_fd = -1; - net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); - - /* Disable Nagle algorithm on TCP sockets to reduce latency */ - socket_set_nodelay(fd); - - if (is_connected) { - net_stream_connect(s); - } else { - qemu_set_fd_handler(s->fd, NULL, net_stream_connect, s); - } - return s; -} - -static void net_stream_accept(void *opaque) +static void net_stream_listen(QIONetListener *listener, + QIOChannelSocket *cioc, + void *opaque) { NetStreamState *s = opaque; - struct sockaddr_storage saddr; - socklen_t len; - int fd; + SocketAddress *addr; + char *uri; - for (;;) { - len = sizeof(saddr); - fd = qemu_accept(s->listen_fd, (struct sockaddr *)&saddr, &len); - if (fd < 0 && errno != EINTR) { - return; - } else if (fd >= 0) { - qemu_set_fd_handler(s->listen_fd, NULL, NULL, NULL); - break; - } - } + object_ref(OBJECT(cioc)); - s->fd = fd; + qio_net_listener_set_client_func(s->listener, NULL, s, NULL); + + s->ioc = QIO_CHANNEL(cioc); + qio_channel_set_name(s->ioc, "stream-server"); s->nc.link_down = false; - net_stream_connect(s); - switch (saddr.ss_family) { - case AF_INET: { - struct sockaddr_in *saddr_in = (struct sockaddr_in *)&saddr; - qemu_set_info_str(&s->nc, "connection from %s:%d", - inet_ntoa(saddr_in->sin_addr), - ntohs(saddr_in->sin_port)); - break; - } - case AF_UNIX: { - struct sockaddr_un saddr_un; + s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, net_stream_send, + s, NULL); - len = sizeof(saddr_un); - getsockname(s->listen_fd, (struct sockaddr *)&saddr_un, &len); - qemu_set_info_str(&s->nc, "connect from %s", saddr_un.sun_path); - break; + if (cioc->localAddr.ss_family == AF_UNIX) { + addr = qio_channel_socket_get_local_address(cioc, NULL); + } else { + addr = qio_channel_socket_get_remote_address(cioc, NULL); } - default: - g_assert_not_reached(); + g_assert(addr != NULL); + uri = socket_uri(addr); + qemu_set_info_str(&s->nc, uri); + g_free(uri); + qapi_free_SocketAddress(addr); +} + +static void net_stream_server_listening(QIOTask *task, gpointer opaque) +{ + NetStreamState *s = opaque; + QIOChannelSocket *listen_sioc = QIO_CHANNEL_SOCKET(s->listen_ioc); + SocketAddress *addr; + int ret; + + if (listen_sioc->fd < 0) { + qemu_set_info_str(&s->nc, "connection error"); + return; } + + addr = qio_channel_socket_get_local_address(listen_sioc, NULL); + g_assert(addr != NULL); + ret = qemu_socket_try_set_nonblock(listen_sioc->fd); + if (addr->type == SOCKET_ADDRESS_TYPE_FD && ret < 0) { + qemu_set_info_str(&s->nc, "can't use file descriptor %s (errno %d)", + addr->u.fd.str, -ret); + return; + } + g_assert(ret == 0); + qapi_free_SocketAddress(addr); + + s->nc.link_down = true; + s->listener = qio_net_listener_new(); + + net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); + qio_net_listener_set_client_func(s->listener, net_stream_listen, s, NULL); + qio_net_listener_add(s->listener, listen_sioc); } static int net_stream_server_init(NetClientState *peer, @@ -283,105 +286,61 @@ static int net_stream_server_init(NetClientState *peer, { NetClientState *nc; NetStreamState *s; - int fd, ret; - - switch (addr->type) { - case SOCKET_ADDRESS_TYPE_INET: { - struct sockaddr_in saddr_in; - - if (convert_host_port(&saddr_in, addr->u.inet.host, addr->u.inet.port, - errp) < 0) { - return -1; - } - - fd = qemu_socket(PF_INET, SOCK_STREAM, 0); - if (fd < 0) { - error_setg_errno(errp, errno, "can't create stream socket"); - return -1; - } - qemu_socket_set_nonblock(fd); - - socket_set_fast_reuse(fd); - - ret = bind(fd, (struct sockaddr *)&saddr_in, sizeof(saddr_in)); - if (ret < 0) { - error_setg_errno(errp, errno, "can't bind ip=%s to socket", - inet_ntoa(saddr_in.sin_addr)); - closesocket(fd); - return -1; - } - break; - } - case SOCKET_ADDRESS_TYPE_UNIX: { - struct sockaddr_un saddr_un; - - ret = unlink(addr->u.q_unix.path); - if (ret < 0 && errno != ENOENT) { - error_setg_errno(errp, errno, "failed to unlink socket %s", - addr->u.q_unix.path); - return -1; - } - - saddr_un.sun_family = PF_UNIX; - ret = snprintf(saddr_un.sun_path, sizeof(saddr_un.sun_path), "%s", - addr->u.q_unix.path); - if (ret < 0 || ret >= sizeof(saddr_un.sun_path)) { - error_setg(errp, "UNIX socket path '%s' is too long", - addr->u.q_unix.path); - error_append_hint(errp, "Path must be less than %zu bytes\n", - sizeof(saddr_un.sun_path)); - return -1; - } - - fd = qemu_socket(PF_UNIX, SOCK_STREAM, 0); - if (fd < 0) { - error_setg_errno(errp, errno, "can't create stream socket"); - return -1; - } - qemu_socket_set_nonblock(fd); - - ret = bind(fd, (struct sockaddr *)&saddr_un, sizeof(saddr_un)); - if (ret < 0) { - error_setg_errno(errp, errno, "can't create socket with path: %s", - saddr_un.sun_path); - closesocket(fd); - return -1; - } - break; - } - case SOCKET_ADDRESS_TYPE_FD: - fd = monitor_fd_param(monitor_cur(), addr->u.fd.str, errp); - if (fd == -1) { - return -1; - } - ret = qemu_socket_try_set_nonblock(fd); - if (ret < 0) { - error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", - name, fd); - return -1; - } - break; - default: - error_setg(errp, "only support inet or fd type"); - return -1; - } - - ret = listen(fd, 0); - if (ret < 0) { - error_setg_errno(errp, errno, "can't listen on socket"); - closesocket(fd); - return -1; - } + QIOChannelSocket *listen_sioc = qio_channel_socket_new(); nc = qemu_new_net_client(&net_stream_info, peer, model, name); s = DO_UPCAST(NetStreamState, nc, nc); - s->fd = -1; - s->listen_fd = fd; - s->nc.link_down = true; + + s->listen_ioc = QIO_CHANNEL(listen_sioc); + qio_channel_socket_listen_async(listen_sioc, addr, 0, + net_stream_server_listening, s, + NULL, NULL); + + return 0; +} + +static void net_stream_client_connected(QIOTask *task, gpointer opaque) +{ + NetStreamState *s = opaque; + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(s->ioc); + SocketAddress *addr; + gchar *uri; + int ret; + + if (sioc->fd < 0) { + qemu_set_info_str(&s->nc, "connection error"); + goto error; + } + + addr = qio_channel_socket_get_remote_address(sioc, NULL); + g_assert(addr != NULL); + uri = socket_uri(addr); + qemu_set_info_str(&s->nc, uri); + g_free(uri); + + ret = qemu_socket_try_set_nonblock(sioc->fd); + if (addr->type == SOCKET_ADDRESS_TYPE_FD && ret < 0) { + qemu_set_info_str(&s->nc, "can't use file descriptor %s (errno %d)", + addr->u.fd.str, -ret); + qapi_free_SocketAddress(addr); + goto error; + } + g_assert(ret == 0); + net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); - qemu_set_fd_handler(s->listen_fd, net_stream_accept, NULL, s); - return 0; + /* Disable Nagle algorithm on TCP sockets to reduce latency */ + qio_channel_set_delay(s->ioc, false); + + s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, net_stream_send, + s, NULL); + s->nc.link_down = false; + qapi_free_SocketAddress(addr); + + return; +error: + object_unref(OBJECT(s->ioc)); + s->ioc = NULL; } static int net_stream_client_init(NetClientState *peer, @@ -391,118 +350,19 @@ static int net_stream_client_init(NetClientState *peer, Error **errp) { NetStreamState *s; - struct sockaddr_in saddr_in; - struct sockaddr_un saddr_un; - int fd, connected, ret; + NetClientState *nc; + QIOChannelSocket *sioc = qio_channel_socket_new(); - switch (addr->type) { - case SOCKET_ADDRESS_TYPE_INET: - if (convert_host_port(&saddr_in, addr->u.inet.host, addr->u.inet.port, - errp) < 0) { - return -1; - } + nc = qemu_new_net_client(&net_stream_info, peer, model, name); + s = DO_UPCAST(NetStreamState, nc, nc); - fd = qemu_socket(PF_INET, SOCK_STREAM, 0); - if (fd < 0) { - error_setg_errno(errp, errno, "can't create stream socket"); - return -1; - } - qemu_socket_set_nonblock(fd); + s->ioc = QIO_CHANNEL(sioc); + s->nc.link_down = true; - connected = 0; - for (;;) { - ret = connect(fd, (struct sockaddr *)&saddr_in, sizeof(saddr_in)); - if (ret < 0) { - if (errno == EINTR || errno == EWOULDBLOCK) { - /* continue */ - } else if (errno == EINPROGRESS || - errno == EALREADY) { - break; - } else { - error_setg_errno(errp, errno, "can't connect socket"); - closesocket(fd); - return -1; - } - } else { - connected = 1; - break; - } - } - break; - case SOCKET_ADDRESS_TYPE_UNIX: - saddr_un.sun_family = PF_UNIX; - ret = snprintf(saddr_un.sun_path, sizeof(saddr_un.sun_path), "%s", - addr->u.q_unix.path); - if (ret < 0 || ret >= sizeof(saddr_un.sun_path)) { - error_setg(errp, "UNIX socket path '%s' is too long", - addr->u.q_unix.path); - error_append_hint(errp, "Path must be less than %zu bytes\n", - sizeof(saddr_un.sun_path)); - return -1; - } + qio_channel_socket_connect_async(sioc, addr, + net_stream_client_connected, s, + NULL, NULL); - fd = qemu_socket(PF_UNIX, SOCK_STREAM, 0); - if (fd < 0) { - error_setg_errno(errp, errno, "can't create stream socket"); - return -1; - } - qemu_socket_set_nonblock(fd); - - connected = 0; - for (;;) { - ret = connect(fd, (struct sockaddr *)&saddr_un, sizeof(saddr_un)); - if (ret < 0) { - if (errno == EINTR || errno == EWOULDBLOCK) { - /* continue */ - } else if (errno == EAGAIN || - errno == EALREADY) { - break; - } else { - error_setg_errno(errp, errno, "can't connect socket"); - closesocket(fd); - return -1; - } - } else { - connected = 1; - break; - } - } - break; - case SOCKET_ADDRESS_TYPE_FD: - fd = monitor_fd_param(monitor_cur(), addr->u.fd.str, errp); - if (fd == -1) { - return -1; - } - ret = qemu_socket_try_set_nonblock(fd); - if (ret < 0) { - error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", - name, fd); - return -1; - } - connected = 1; - break; - default: - error_setg(errp, "only support inet, unix or fd type"); - return -1; - } - - s = net_stream_fd_init(peer, model, name, fd, connected); - - switch (addr->type) { - case SOCKET_ADDRESS_TYPE_INET: - qemu_set_info_str(&s->nc, "connect to %s:%d", - inet_ntoa(saddr_in.sin_addr), - ntohs(saddr_in.sin_port)); - break; - case SOCKET_ADDRESS_TYPE_UNIX: - qemu_set_info_str(&s->nc, " connect to %s", saddr_un.sun_path); - break; - case SOCKET_ADDRESS_TYPE_FD: - qemu_set_info_str(&s->nc, "connect to fd %d", fd); - break; - default: - g_assert_not_reached(); - } return 0; } diff --git a/qemu-options.hx b/qemu-options.hx index e76142bfb6..ceee0ddc25 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2772,8 +2772,8 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n" " configure a network backend to connect to another network\n" " using an UDP tunnel\n" - "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port\n" - "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path\n" + "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off]\n" + "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off]\n" "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n" " configure a network backend to connect to another network\n" " using a socket connection in stream mode.\n" From e506fee8b1e092f6ac6f9459bf6a35b807644ad2 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 21 Oct 2022 11:09:22 +0200 Subject: [PATCH 26/26] net: stream: add QAPI events to report connection state The netdev reports NETDEV_STREAM_CONNECTED event when the backend is connected, and NETDEV_STREAM_DISCONNECTED when it is disconnected. The NETDEV_STREAM_CONNECTED event includes the destination address. This allows a system manager like libvirt to detect when the server fails. For instance with passt: { 'execute': 'qmp_capabilities' } { "return": { } } { "timestamp": { "seconds": 1666341395, "microseconds": 505347 }, "event": "NETDEV_STREAM_CONNECTED", "data": { "netdev-id": "netdev0", "addr": { "path": "/tmp/passt_1.socket", "type": "unix" } } } [killing passt here] { "timestamp": { "seconds": 1666341430, "microseconds": 968694 }, "event": "NETDEV_STREAM_DISCONNECTED", "data": { "netdev-id": "netdev0" } } Signed-off-by: Laurent Vivier Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- net/stream.c | 5 +++++ qapi/net.json | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/net/stream.c b/net/stream.c index 54c67e14d2..53b7040cc4 100644 --- a/net/stream.c +++ b/net/stream.c @@ -38,6 +38,7 @@ #include "io/channel.h" #include "io/channel-socket.h" #include "io/net-listener.h" +#include "qapi/qapi-events-net.h" typedef struct NetStreamState { NetClientState nc; @@ -168,6 +169,8 @@ static gboolean net_stream_send(QIOChannel *ioc, s->nc.link_down = true; qemu_set_info_str(&s->nc, ""); + qapi_event_send_netdev_stream_disconnected(s->nc.name); + return G_SOURCE_REMOVE; } buf = buf1; @@ -244,6 +247,7 @@ static void net_stream_listen(QIONetListener *listener, uri = socket_uri(addr); qemu_set_info_str(&s->nc, uri); g_free(uri); + qapi_event_send_netdev_stream_connected(s->nc.name, addr); qapi_free_SocketAddress(addr); } @@ -335,6 +339,7 @@ static void net_stream_client_connected(QIOTask *task, gpointer opaque) s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, net_stream_send, s, NULL); s->nc.link_down = false; + qapi_event_send_netdev_stream_connected(s->nc.name, addr); qapi_free_SocketAddress(addr); return; diff --git a/qapi/net.json b/qapi/net.json index cbc8d77c97..522ac582ed 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -898,3 +898,52 @@ ## { 'event': 'FAILOVER_NEGOTIATED', 'data': {'device-id': 'str'} } + +## +# @NETDEV_STREAM_CONNECTED: +# +# Emitted when the netdev stream backend is connected +# +# @netdev-id: QEMU netdev id that is connected +# @addr: The destination address +# +# Since: 7.2 +# +# Example: +# +# <- { "event": "NETDEV_STREAM_CONNECTED", +# "data": { "netdev-id": "netdev0", +# "addr": { "port": "47666", "ipv6": true, +# "host": "::1", "type": "inet" } }, +# "timestamp": { "seconds": 1666269863, "microseconds": 311222 } } +# +# or +# +# <- { "event": "NETDEV_STREAM_CONNECTED", +# "data": { "netdev-id": "netdev0", +# "addr": { "path": "/tmp/qemu0", "type": "unix" } }, +# "timestamp": { "seconds": 1666269706, "microseconds": 413651 } } +# +## +{ 'event': 'NETDEV_STREAM_CONNECTED', + 'data': { 'netdev-id': 'str', + 'addr': 'SocketAddress' } } + +## +# @NETDEV_STREAM_DISCONNECTED: +# +# Emitted when the netdev stream backend is disconnected +# +# @netdev-id: QEMU netdev id that is disconnected +# +# Since: 7.2 +# +# Example: +# +# <- { 'event': 'NETDEV_STREAM_DISCONNECTED', +# 'data': {'netdev-id': 'netdev0'}, +# 'timestamp': {'seconds': 1663330937, 'microseconds': 526695} } +# +## +{ 'event': 'NETDEV_STREAM_DISCONNECTED', + 'data': { 'netdev-id': 'str' } }