diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6d456e579314..a75c66c8679d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12570,7 +12570,8 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, if (need_reset && prog) for (i = 0; i < vsi->num_queue_pairs; i++) if (vsi->xdp_rings[i]->xsk_umem) - (void)i40e_xsk_async_xmit(vsi->netdev, i); + (void)i40e_xsk_wakeup(vsi->netdev, i, + XDP_WAKEUP_RX); return 0; } @@ -12892,7 +12893,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_bridge_setlink = i40e_ndo_bridge_setlink, .ndo_bpf = i40e_xdp, .ndo_xdp_xmit = i40e_xdp_xmit, - .ndo_xsk_async_xmit = i40e_xsk_async_xmit, + .ndo_xsk_wakeup = i40e_xsk_wakeup, .ndo_dfwd_add_station = i40e_fwd_add, .ndo_dfwd_del_station = i40e_fwd_del, }; diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 32bad014d76c..42c90126b6c4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -116,7 +116,7 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem, return err; /* Kick start the NAPI context so that receiving will start */ - err = i40e_xsk_async_xmit(vsi->netdev, qid); + err = i40e_xsk_wakeup(vsi->netdev, qid, XDP_WAKEUP_RX); if (err) return err; } @@ -626,6 +626,15 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) i40e_finalize_xdp_rx(rx_ring, xdp_xmit); i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets); + + if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) { + if (failure || rx_ring->next_to_clean == rx_ring->next_to_use) + xsk_set_rx_need_wakeup(rx_ring->xsk_umem); + else + xsk_clear_rx_need_wakeup(rx_ring->xsk_umem); + + return (int)total_rx_packets; + } return failure ? budget : (int)total_rx_packets; } @@ -681,6 +690,8 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) i40e_xdp_ring_update_tail(xdp_ring); xsk_umem_consume_tx_done(xdp_ring->xsk_umem); + if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem)) + xsk_clear_tx_need_wakeup(xdp_ring->xsk_umem); } return !!budget && work_done; @@ -759,19 +770,27 @@ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, i40e_update_tx_stats(tx_ring, completed_frames, total_bytes); out_xmit: + if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) { + if (tx_ring->next_to_clean == tx_ring->next_to_use) + xsk_set_tx_need_wakeup(tx_ring->xsk_umem); + else + xsk_clear_tx_need_wakeup(tx_ring->xsk_umem); + } + xmit_done = i40e_xmit_zc(tx_ring, budget); return work_done && xmit_done; } /** - * i40e_xsk_async_xmit - Implements the ndo_xsk_async_xmit + * i40e_xsk_wakeup - Implements the ndo_xsk_wakeup * @dev: the netdevice * @queue_id: queue id to wake up + * @flags: ignored in our case since we have Rx and Tx in the same NAPI. * * Returns <0 for errors, 0 otherwise. **/ -int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id) +int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h index 8cc0a2e7d9a2..9ed59c14eb55 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h @@ -18,6 +18,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget); bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring, int napi_budget); -int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id); +int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); #endif /* _I40E_XSK_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index dc7b128c780e..05729b448612 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10263,7 +10263,8 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) if (need_reset && prog) for (i = 0; i < adapter->num_rx_queues; i++) if (adapter->xdp_ring[i]->xsk_umem) - (void)ixgbe_xsk_async_xmit(adapter->netdev, i); + (void)ixgbe_xsk_wakeup(adapter->netdev, i, + XDP_WAKEUP_RX); return 0; } @@ -10382,7 +10383,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_features_check = ixgbe_features_check, .ndo_bpf = ixgbe_xdp, .ndo_xdp_xmit = ixgbe_xdp_xmit, - .ndo_xsk_async_xmit = ixgbe_xsk_async_xmit, + .ndo_xsk_wakeup = ixgbe_xsk_wakeup, }; static void ixgbe_disable_txr_hw(struct ixgbe_adapter *adapter, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h index d93a690aff74..6d01700b46bc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h @@ -42,7 +42,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring); bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, struct ixgbe_ring *tx_ring, int napi_budget); -int ixgbe_xsk_async_xmit(struct net_device *dev, u32 queue_id); +int ixgbe_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring); #endif /* #define _IXGBE_TXRX_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 6b609553329f..9a28d98a1484 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -100,7 +100,7 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter, ixgbe_txrx_ring_enable(adapter, qid); /* Kick start the NAPI context so that receiving will start */ - err = ixgbe_xsk_async_xmit(adapter->netdev, qid); + err = ixgbe_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX); if (err) return err; } @@ -547,6 +547,14 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, q_vector->rx.total_packets += total_rx_packets; q_vector->rx.total_bytes += total_rx_bytes; + if (xsk_umem_uses_need_wakeup(rx_ring->xsk_umem)) { + if (failure || rx_ring->next_to_clean == rx_ring->next_to_use) + xsk_set_rx_need_wakeup(rx_ring->xsk_umem); + else + xsk_clear_rx_need_wakeup(rx_ring->xsk_umem); + + return (int)total_rx_packets; + } return failure ? budget : (int)total_rx_packets; } @@ -615,6 +623,8 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) if (tx_desc) { ixgbe_xdp_ring_update_tail(xdp_ring); xsk_umem_consume_tx_done(xdp_ring->xsk_umem); + if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem)) + xsk_clear_tx_need_wakeup(xdp_ring->xsk_umem); } return !!budget && work_done; @@ -688,11 +698,19 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, if (xsk_frames) xsk_umem_complete_tx(umem, xsk_frames); + if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) { + if (tx_ring->next_to_clean == tx_ring->next_to_use) + xsk_set_tx_need_wakeup(tx_ring->xsk_umem); + else + xsk_clear_tx_need_wakeup(tx_ring->xsk_umem); + } + xmit_done = ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit); + return budget > 0 && xmit_done; } -int ixgbe_xsk_async_xmit(struct net_device *dev, u32 qid) +int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_ring *ring; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h index 307b923a1361..cab0e93497ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -5,6 +5,7 @@ #define __MLX5_EN_XSK_RX_H__ #include "en.h" +#include /* RX data path */ @@ -24,4 +25,17 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); +static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err) +{ + if (!xsk_umem_uses_need_wakeup(rq->umem)) + return alloc_err; + + if (unlikely(alloc_err)) + xsk_set_rx_need_wakeup(rq->umem); + else + xsk_clear_rx_need_wakeup(rq->umem); + + return false; +} + #endif /* __MLX5_EN_XSK_RX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c index 35e188cf4ea4..9704634a87aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -7,7 +7,7 @@ #include "en/params.h" #include -int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid) +int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_params *params = &priv->channels.params; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h index 7add18bf78d8..79b487d89757 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h @@ -5,11 +5,23 @@ #define __MLX5_EN_XSK_TX_H__ #include "en.h" +#include /* TX data path */ -int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid); +int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags); bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget); +static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq) +{ + if (!xsk_umem_uses_need_wakeup(sq->umem)) + return; + + if (sq->pc != sq->cc) + xsk_clear_tx_need_wakeup(sq->umem); + else + xsk_set_tx_need_wakeup(sq->umem); +} + #endif /* __MLX5_EN_XSK_TX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9a2fcef6e7f0..9df7e5c3c4cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4540,7 +4540,7 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_tx_timeout = mlx5e_tx_timeout, .ndo_bpf = mlx5e_xdp, .ndo_xdp_xmit = mlx5e_xdp_xmit, - .ndo_xsk_async_xmit = mlx5e_xsk_async_xmit, + .ndo_xsk_wakeup = mlx5e_xsk_wakeup, #ifdef CONFIG_MLX5_EN_ARFS .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 60570b442fff..fae0694d1cf8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -692,8 +692,11 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk; rq->mpwqe.actual_wq_head = head; - /* If XSK Fill Ring doesn't have enough frames, busy poll by - * rescheduling the NAPI poll. + /* If XSK Fill Ring doesn't have enough frames, report the error, so + * that one of the actions can be performed: + * 1. If need_wakeup is used, signal that the application has to kick + * the driver when it refills the Fill Ring. + * 2. Otherwise, busy poll by rescheduling the NAPI poll. */ if (unlikely(alloc_err == -ENOMEM && rq->umem)) return true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 49b06b256c92..257a7c9f7a14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -33,6 +33,7 @@ #include #include "en.h" #include "en/xdp.h" +#include "en/xsk/rx.h" #include "en/xsk/tx.h" static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) @@ -81,6 +82,29 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq) mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl); } +static bool mlx5e_napi_xsk_post(struct mlx5e_xdpsq *xsksq, struct mlx5e_rq *xskrq) +{ + bool busy_xsk = false, xsk_rx_alloc_err; + + /* Handle the race between the application querying need_wakeup and the + * driver setting it: + * 1. Update need_wakeup both before and after the TX. If it goes to + * "yes", it can only happen with the first update. + * 2. If the application queried need_wakeup before we set it, the + * packets will be transmitted anyway, even w/o a wakeup. + * 3. Give a chance to clear need_wakeup after new packets were queued + * for TX. + */ + mlx5e_xsk_update_tx_wakeup(xsksq); + busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); + mlx5e_xsk_update_tx_wakeup(xsksq); + + xsk_rx_alloc_err = xskrq->post_wqes(xskrq); + busy_xsk |= mlx5e_xsk_update_rx_wakeup(xskrq, xsk_rx_alloc_err); + + return busy_xsk; +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -122,8 +146,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) if (xsk_open) { mlx5e_poll_ico_cq(&c->xskicosq.cq); busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq); - busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); - busy_xsk |= xskrq->post_wqes(xskrq); + busy_xsk |= mlx5e_napi_xsk_post(xsksq, xskrq); } busy |= busy_xsk; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 55ac223553f8..0ef0570386a2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -901,6 +901,10 @@ struct netdev_bpf { }; }; +/* Flags for ndo_xsk_wakeup. */ +#define XDP_WAKEUP_RX (1 << 0) +#define XDP_WAKEUP_TX (1 << 1) + #ifdef CONFIG_XFRM_OFFLOAD struct xfrmdev_ops { int (*xdo_dev_state_add) (struct xfrm_state *x); @@ -1227,6 +1231,12 @@ struct tlsdev_ops; * that got dropped are freed/returned via xdp_return_frame(). * Returns negative number, means general error invoking ndo, meaning * no frames were xmit'ed and core-caller will free all frames. + * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); + * This function is used to wake up the softirq, ksoftirqd or kthread + * responsible for sending and/or receiving packets on a specific + * queue id bound to an AF_XDP socket. The flags field specifies if + * only RX, only Tx, or both should be woken up using the flags + * XDP_WAKEUP_RX and XDP_WAKEUP_TX. * struct devlink_port *(*ndo_get_devlink_port)(struct net_device *dev); * Get devlink port instance associated with a given netdev. * Called with a reference on the netdevice and devlink locks only, @@ -1426,8 +1436,8 @@ struct net_device_ops { int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp, u32 flags); - int (*ndo_xsk_async_xmit)(struct net_device *dev, - u32 queue_id); + int (*ndo_xsk_wakeup)(struct net_device *dev, + u32 queue_id, u32 flags); struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); }; diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 69796d264f06..6aebea2b18cb 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -27,6 +27,9 @@ struct xdp_umem_fq_reuse { u64 handles[]; }; +/* Flags for the umem flags field. */ +#define XDP_UMEM_USES_NEED_WAKEUP (1 << 0) + struct xdp_umem { struct xsk_queue *fq; struct xsk_queue *cq; @@ -41,10 +44,12 @@ struct xdp_umem { struct work_struct work; struct page **pgs; u32 npgs; + u16 queue_id; + u8 need_wakeup; + u8 flags; int id; struct net_device *dev; struct xdp_umem_fq_reuse *fq_reuse; - u16 queue_id; bool zc; spinlock_t xsk_list_lock; struct list_head xsk_list; @@ -95,6 +100,11 @@ struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem, struct xdp_umem_fq_reuse *newq); void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq); struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id); +void xsk_set_rx_need_wakeup(struct xdp_umem *umem); +void xsk_set_tx_need_wakeup(struct xdp_umem *umem); +void xsk_clear_rx_need_wakeup(struct xdp_umem *umem); +void xsk_clear_tx_need_wakeup(struct xdp_umem *umem); +bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem); static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) { @@ -241,6 +251,27 @@ static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr) { } +static inline void xsk_set_rx_need_wakeup(struct xdp_umem *umem) +{ +} + +static inline void xsk_set_tx_need_wakeup(struct xdp_umem *umem) +{ +} + +static inline void xsk_clear_rx_need_wakeup(struct xdp_umem *umem) +{ +} + +static inline void xsk_clear_tx_need_wakeup(struct xdp_umem *umem) +{ +} + +static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem) +{ + return false; +} + #endif /* CONFIG_XDP_SOCKETS */ #endif /* _LINUX_XDP_SOCK_H */ diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index faaa5ca2a117..62b80d57b72a 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -16,6 +16,15 @@ #define XDP_SHARED_UMEM (1 << 0) #define XDP_COPY (1 << 1) /* Force copy-mode */ #define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */ +/* If this option is set, the driver might go sleep and in that case + * the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be + * set. If it is set, the application need to explicitly wake up the + * driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are + * running the driver and the application on the same core, you should + * use this option so that the kernel will yield to the user space + * application. + */ +#define XDP_USE_NEED_WAKEUP (1 << 3) struct sockaddr_xdp { __u16 sxdp_family; @@ -25,10 +34,14 @@ struct sockaddr_xdp { __u32 sxdp_shared_umem_fd; }; +/* XDP_RING flags */ +#define XDP_RING_NEED_WAKEUP (1 << 0) + struct xdp_ring_offset { __u64 producer; __u64 consumer; __u64 desc; + __u64 flags; }; struct xdp_mmap_offsets { diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index a0607969f8c0..cda6feb1edb0 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -106,14 +106,22 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, umem->dev = dev; umem->queue_id = queue_id; + if (flags & XDP_USE_NEED_WAKEUP) { + umem->flags |= XDP_UMEM_USES_NEED_WAKEUP; + /* Tx needs to be explicitly woken up the first time. + * Also for supporting drivers that do not implement this + * feature. They will always have to call sendto(). + */ + xsk_set_tx_need_wakeup(umem); + } + dev_hold(dev); if (force_copy) /* For copy-mode, we are done. */ return 0; - if (!dev->netdev_ops->ndo_bpf || - !dev->netdev_ops->ndo_xsk_async_xmit) { + if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_wakeup) { err = -EOPNOTSUPP; goto err_unreg_umem; } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 59b57d708697..9f900b56b15b 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -55,6 +55,66 @@ void xsk_umem_discard_addr(struct xdp_umem *umem) } EXPORT_SYMBOL(xsk_umem_discard_addr); +void xsk_set_rx_need_wakeup(struct xdp_umem *umem) +{ + if (umem->need_wakeup & XDP_WAKEUP_RX) + return; + + umem->fq->ring->flags |= XDP_RING_NEED_WAKEUP; + umem->need_wakeup |= XDP_WAKEUP_RX; +} +EXPORT_SYMBOL(xsk_set_rx_need_wakeup); + +void xsk_set_tx_need_wakeup(struct xdp_umem *umem) +{ + struct xdp_sock *xs; + + if (umem->need_wakeup & XDP_WAKEUP_TX) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(xs, &umem->xsk_list, list) { + xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP; + } + rcu_read_unlock(); + + umem->need_wakeup |= XDP_WAKEUP_TX; +} +EXPORT_SYMBOL(xsk_set_tx_need_wakeup); + +void xsk_clear_rx_need_wakeup(struct xdp_umem *umem) +{ + if (!(umem->need_wakeup & XDP_WAKEUP_RX)) + return; + + umem->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP; + umem->need_wakeup &= ~XDP_WAKEUP_RX; +} +EXPORT_SYMBOL(xsk_clear_rx_need_wakeup); + +void xsk_clear_tx_need_wakeup(struct xdp_umem *umem) +{ + struct xdp_sock *xs; + + if (!(umem->need_wakeup & XDP_WAKEUP_TX)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(xs, &umem->xsk_list, list) { + xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP; + } + rcu_read_unlock(); + + umem->need_wakeup &= ~XDP_WAKEUP_TX; +} +EXPORT_SYMBOL(xsk_clear_tx_need_wakeup); + +bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem) +{ + return umem->flags & XDP_UMEM_USES_NEED_WAKEUP; +} +EXPORT_SYMBOL(xsk_umem_uses_need_wakeup); + static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { void *to_buf, *from_buf; @@ -212,7 +272,8 @@ static int xsk_zc_xmit(struct sock *sk) struct xdp_sock *xs = xdp_sk(sk); struct net_device *dev = xs->dev; - return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id); + return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, + XDP_WAKEUP_TX); } static void xsk_destruct_skb(struct sk_buff *skb) @@ -319,6 +380,12 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock, unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); + struct net_device *dev = xs->dev; + struct xdp_umem *umem = xs->umem; + + if (umem->need_wakeup) + dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, + umem->need_wakeup); if (xs->rx && !xskq_empty_desc(xs->rx)) mask |= POLLIN | POLLRDNORM; @@ -427,7 +494,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) return -EINVAL; flags = sxdp->sxdp_flags; - if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY)) + if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY | + XDP_USE_NEED_WAKEUP)) return -EINVAL; rtnl_lock(); @@ -454,7 +522,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) struct xdp_sock *umem_xs; struct socket *sock; - if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) { + if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) || + (flags & XDP_USE_NEED_WAKEUP)) { /* Cannot specify flags for shared sockets. */ err = -EINVAL; goto out_unlock; @@ -549,6 +618,9 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, } q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx; err = xsk_init_queue(entries, q, false); + if (!err && optname == XDP_TX_RING) + /* Tx needs to be explicitly woken up the first time */ + xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP; mutex_unlock(&xs->mutex); return err; } @@ -610,6 +682,20 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, return -ENOPROTOOPT; } +static void xsk_enter_rxtx_offsets(struct xdp_ring_offset_v1 *ring) +{ + ring->producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); + ring->consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); + ring->desc = offsetof(struct xdp_rxtx_ring, desc); +} + +static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring) +{ + ring->producer = offsetof(struct xdp_umem_ring, ptrs.producer); + ring->consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); + ring->desc = offsetof(struct xdp_umem_ring, desc); +} + static int xsk_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { @@ -649,26 +735,49 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname, case XDP_MMAP_OFFSETS: { struct xdp_mmap_offsets off; + struct xdp_mmap_offsets_v1 off_v1; + bool flags_supported = true; + void *to_copy; - if (len < sizeof(off)) + if (len < sizeof(off_v1)) return -EINVAL; + else if (len < sizeof(off)) + flags_supported = false; - off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); - off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); - off.rx.desc = offsetof(struct xdp_rxtx_ring, desc); - off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); - off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); - off.tx.desc = offsetof(struct xdp_rxtx_ring, desc); + if (flags_supported) { + /* xdp_ring_offset is identical to xdp_ring_offset_v1 + * except for the flags field added to the end. + */ + xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *) + &off.rx); + xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *) + &off.tx); + xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *) + &off.fr); + xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *) + &off.cr); + off.rx.flags = offsetof(struct xdp_rxtx_ring, + ptrs.flags); + off.tx.flags = offsetof(struct xdp_rxtx_ring, + ptrs.flags); + off.fr.flags = offsetof(struct xdp_umem_ring, + ptrs.flags); + off.cr.flags = offsetof(struct xdp_umem_ring, + ptrs.flags); - off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer); - off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); - off.fr.desc = offsetof(struct xdp_umem_ring, desc); - off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer); - off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); - off.cr.desc = offsetof(struct xdp_umem_ring, desc); + len = sizeof(off); + to_copy = &off; + } else { + xsk_enter_rxtx_offsets(&off_v1.rx); + xsk_enter_rxtx_offsets(&off_v1.tx); + xsk_enter_umem_offsets(&off_v1.fr); + xsk_enter_umem_offsets(&off_v1.cr); - len = sizeof(off); - if (copy_to_user(optval, &off, len)) + len = sizeof(off_v1); + to_copy = &off_v1; + } + + if (copy_to_user(optval, to_copy, len)) return -EFAULT; if (put_user(len, optlen)) return -EFAULT; diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h index ba8120610426..4cfd106bdb53 100644 --- a/net/xdp/xsk.h +++ b/net/xdp/xsk.h @@ -4,6 +4,19 @@ #ifndef XSK_H_ #define XSK_H_ +struct xdp_ring_offset_v1 { + __u64 producer; + __u64 consumer; + __u64 desc; +}; + +struct xdp_mmap_offsets_v1 { + struct xdp_ring_offset_v1 rx; + struct xdp_ring_offset_v1 tx; + struct xdp_ring_offset_v1 fr; + struct xdp_ring_offset_v1 cr; +}; + static inline struct xdp_sock *xdp_sk(struct sock *sk) { return (struct xdp_sock *)sk; diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 909c5168ed0f..dd9e985c2461 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -16,6 +16,7 @@ struct xdp_ring { u32 producer ____cacheline_aligned_in_smp; u32 consumer ____cacheline_aligned_in_smp; + u32 flags; }; /* Used for the RX and TX queues for packets */ diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 93eaaf7239b2..da84c760c094 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -67,8 +67,10 @@ static int opt_ifindex; static int opt_queue; static int opt_poll; static int opt_interval = 1; -static u32 opt_xdp_bind_flags; +static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; +static int opt_timeout = 1000; +static bool opt_need_wakeup = true; static __u32 prog_id; struct xsk_umem_info { @@ -352,6 +354,7 @@ static struct option long_options[] = { {"zero-copy", no_argument, 0, 'z'}, {"copy", no_argument, 0, 'c'}, {"frame-size", required_argument, 0, 'f'}, + {"no-need-wakeup", no_argument, 0, 'm'}, {0, 0, 0, 0} }; @@ -372,6 +375,7 @@ static void usage(const char *prog) " -z, --zero-copy Force zero-copy mode.\n" " -c, --copy Force copy mode.\n" " -f, --frame-size=n Set the frame size (must be a power of two, default is %d).\n" + " -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n" "\n"; fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE); exit(EXIT_FAILURE); @@ -384,8 +388,9 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:", long_options, - &option_index); + + c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:m", + long_options, &option_index); if (c == -1) break; @@ -429,6 +434,9 @@ static void parse_command_line(int argc, char **argv) break; case 'f': opt_xsk_frame_size = atoi(optarg); + case 'm': + opt_need_wakeup = false; + opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP; break; default: usage(basename(argv[0])); @@ -459,7 +467,8 @@ static void kick_tx(struct xsk_socket_info *xsk) exit_with_error(errno); } -static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk) +static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, + struct pollfd *fds) { u32 idx_cq = 0, idx_fq = 0; unsigned int rcvd; @@ -468,7 +477,9 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk) if (!xsk->outstanding_tx) return; - kick_tx(xsk); + if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) + kick_tx(xsk); + ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE : xsk->outstanding_tx; @@ -482,6 +493,8 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk) while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); + if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) + ret = poll(fds, num_socks, opt_timeout); ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); } @@ -505,7 +518,8 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk) if (!xsk->outstanding_tx) return; - kick_tx(xsk); + if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) + kick_tx(xsk); rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx); if (rcvd > 0) { @@ -515,20 +529,25 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk) } } -static void rx_drop(struct xsk_socket_info *xsk) +static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds) { unsigned int rcvd, i; u32 idx_rx = 0, idx_fq = 0; int ret; rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); - if (!rcvd) + if (!rcvd) { + if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) + ret = poll(fds, num_socks, opt_timeout); return; + } ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); + if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) + ret = poll(fds, num_socks, opt_timeout); ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); } @@ -549,42 +568,65 @@ static void rx_drop(struct xsk_socket_info *xsk) static void rx_drop_all(void) { struct pollfd fds[MAX_SOCKS + 1]; - int i, ret, timeout, nfds = 1; + int i, ret; memset(fds, 0, sizeof(fds)); for (i = 0; i < num_socks; i++) { fds[i].fd = xsk_socket__fd(xsks[i]->xsk); fds[i].events = POLLIN; - timeout = 1000; /* 1sn */ } for (;;) { if (opt_poll) { - ret = poll(fds, nfds, timeout); + ret = poll(fds, num_socks, opt_timeout); if (ret <= 0) continue; } for (i = 0; i < num_socks; i++) - rx_drop(xsks[i]); + rx_drop(xsks[i], fds); } } -static void tx_only(struct xsk_socket_info *xsk) +static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb) { - int timeout, ret, nfds = 1; - struct pollfd fds[nfds + 1]; - u32 idx, frame_nb = 0; + u32 idx; + + if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == BATCH_SIZE) { + unsigned int i; + + for (i = 0; i < BATCH_SIZE; i++) { + xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr = + (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; + xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len = + sizeof(pkt_data) - 1; + } + + xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE); + xsk->outstanding_tx += BATCH_SIZE; + frame_nb += BATCH_SIZE; + frame_nb %= NUM_FRAMES; + } + + complete_tx_only(xsk); +} + +static void tx_only_all(void) +{ + struct pollfd fds[MAX_SOCKS]; + u32 frame_nb[MAX_SOCKS] = {}; + int i, ret; memset(fds, 0, sizeof(fds)); - fds[0].fd = xsk_socket__fd(xsk->xsk); - fds[0].events = POLLOUT; - timeout = 1000; /* 1sn */ + for (i = 0; i < num_socks; i++) { + fds[0].fd = xsk_socket__fd(xsks[i]->xsk); + fds[0].events = POLLOUT; + } for (;;) { if (opt_poll) { - ret = poll(fds, nfds, timeout); + ret = poll(fds, num_socks, opt_timeout); if (ret <= 0) continue; @@ -592,69 +634,75 @@ static void tx_only(struct xsk_socket_info *xsk) continue; } - if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == - BATCH_SIZE) { - unsigned int i; - - for (i = 0; i < BATCH_SIZE; i++) { - xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr - = (frame_nb + i) * opt_xsk_frame_size; - xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len = - sizeof(pkt_data) - 1; - } - - xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE); - xsk->outstanding_tx += BATCH_SIZE; - frame_nb += BATCH_SIZE; - frame_nb %= NUM_FRAMES; - } - - complete_tx_only(xsk); + for (i = 0; i < num_socks; i++) + tx_only(xsks[i], frame_nb[i]); } } -static void l2fwd(struct xsk_socket_info *xsk) +static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) { - for (;;) { - unsigned int rcvd, i; - u32 idx_rx = 0, idx_tx = 0; - int ret; + unsigned int rcvd, i; + u32 idx_rx = 0, idx_tx = 0; + int ret; - for (;;) { - complete_tx_l2fwd(xsk); + complete_tx_l2fwd(xsk, fds); - rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, - &idx_rx); - if (rcvd > 0) - break; - } + rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); + if (!rcvd) { + if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) + ret = poll(fds, num_socks, opt_timeout); + return; + } + ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); + while (ret != rcvd) { + if (ret < 0) + exit_with_error(-ret); + if (xsk_ring_prod__needs_wakeup(&xsk->tx)) + kick_tx(xsk); ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); - while (ret != rcvd) { - if (ret < 0) - exit_with_error(-ret); - ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); + } + + for (i = 0; i < rcvd; i++) { + u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; + u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; + char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); + + swap_mac_addresses(pkt); + + hex_dump(pkt, len, addr); + xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = addr; + xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len; + } + + xsk_ring_prod__submit(&xsk->tx, rcvd); + xsk_ring_cons__release(&xsk->rx, rcvd); + + xsk->rx_npkts += rcvd; + xsk->outstanding_tx += rcvd; +} + +static void l2fwd_all(void) +{ + struct pollfd fds[MAX_SOCKS]; + int i, ret; + + memset(fds, 0, sizeof(fds)); + + for (i = 0; i < num_socks; i++) { + fds[i].fd = xsk_socket__fd(xsks[i]->xsk); + fds[i].events = POLLOUT | POLLIN; + } + + for (;;) { + if (opt_poll) { + ret = poll(fds, num_socks, opt_timeout); + if (ret <= 0) + continue; } - for (i = 0; i < rcvd; i++) { - u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, - idx_rx)->addr; - u32 len = xsk_ring_cons__rx_desc(&xsk->rx, - idx_rx++)->len; - char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); - - swap_mac_addresses(pkt); - - hex_dump(pkt, len, addr); - xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = addr; - xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len; - } - - xsk_ring_prod__submit(&xsk->tx, rcvd); - xsk_ring_cons__release(&xsk->rx, rcvd); - - xsk->rx_npkts += rcvd; - xsk->outstanding_tx += rcvd; + for (i = 0; i < num_socks; i++) + l2fwd(xsks[i], fds); } } @@ -705,9 +753,9 @@ int main(int argc, char **argv) if (opt_bench == BENCH_RXDROP) rx_drop_all(); else if (opt_bench == BENCH_TXONLY) - tx_only(xsks[0]); + tx_only_all(); else - l2fwd(xsks[0]); + l2fwd_all(); return 0; } diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index faaa5ca2a117..62b80d57b72a 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -16,6 +16,15 @@ #define XDP_SHARED_UMEM (1 << 0) #define XDP_COPY (1 << 1) /* Force copy-mode */ #define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */ +/* If this option is set, the driver might go sleep and in that case + * the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be + * set. If it is set, the application need to explicitly wake up the + * driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are + * running the driver and the application on the same core, you should + * use this option so that the kernel will yield to the user space + * application. + */ +#define XDP_USE_NEED_WAKEUP (1 << 3) struct sockaddr_xdp { __u16 sxdp_family; @@ -25,10 +34,14 @@ struct sockaddr_xdp { __u32 sxdp_shared_umem_fd; }; +/* XDP_RING flags */ +#define XDP_RING_NEED_WAKEUP (1 << 0) + struct xdp_ring_offset { __u64 producer; __u64 consumer; __u64 desc; + __u64 flags; }; struct xdp_mmap_offsets { diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 680e63066cf3..17e8d79c11a8 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -224,6 +224,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, fill->size = umem->config.fill_size; fill->producer = map + off.fr.producer; fill->consumer = map + off.fr.consumer; + fill->flags = map + off.fr.flags; fill->ring = map + off.fr.desc; fill->cached_cons = umem->config.fill_size; @@ -241,6 +242,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, comp->size = umem->config.comp_size; comp->producer = map + off.cr.producer; comp->consumer = map + off.cr.consumer; + comp->flags = map + off.cr.flags; comp->ring = map + off.cr.desc; *umem_ptr = umem; @@ -564,6 +566,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, rx->size = xsk->config.rx_size; rx->producer = rx_map + off.rx.producer; rx->consumer = rx_map + off.rx.consumer; + rx->flags = rx_map + off.rx.flags; rx->ring = rx_map + off.rx.desc; } xsk->rx = rx; @@ -583,6 +586,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, tx->size = xsk->config.tx_size; tx->producer = tx_map + off.tx.producer; tx->consumer = tx_map + off.tx.consumer; + tx->flags = tx_map + off.tx.flags; tx->ring = tx_map + off.tx.desc; tx->cached_cons = xsk->config.tx_size; } diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 833a6e60d065..aa1d6122b7db 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -32,6 +32,7 @@ struct name { \ __u32 *producer; \ __u32 *consumer; \ void *ring; \ + __u32 *flags; \ } DEFINE_XSK_RING(xsk_ring_prod); @@ -76,6 +77,11 @@ xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx) return &descs[idx & rx->mask]; } +static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r) +{ + return *r->flags & XDP_RING_NEED_WAKEUP; +} + static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) { __u32 free_entries = r->cached_cons - r->cached_prod;