From 9fa9379dcb52a9172c0ee520d8afe7a3dc6288b4 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 30 May 2014 13:40:04 -0300 Subject: [PATCH 1/8] net: mvneta: Use default NAPI weight instead of a custom one This driver has no need for a custom NAPI weigth. Use the default one, which has the same value. Signed-off-by: Ezequiel Garcia Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index b8919fa6ed27..af1ac82561c0 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -219,9 +219,6 @@ #define MVNETA_RX_COAL_PKTS 32 #define MVNETA_RX_COAL_USEC 100 -/* Napi polling weight */ -#define MVNETA_RX_POLL_WEIGHT 64 - /* The two bytes Marvell header. Either contains a special value used * by Marvell switches when a specific hardware mode is enabled (not * supported by this driver) or is filled automatically by zeroes on @@ -3025,7 +3022,7 @@ static int mvneta_probe(struct platform_device *pdev) if (dram_target_info) mvneta_conf_mbus_windows(pp, dram_target_info); - netif_napi_add(dev, &pp->napi, mvneta_poll, MVNETA_RX_POLL_WEIGHT); + netif_napi_add(dev, &pp->napi, mvneta_poll, NAPI_POLL_WEIGHT); dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; dev->hw_features |= dev->features; From 8eef5f9723f987d14de409c0a65e94a83841b28a Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 30 May 2014 13:40:05 -0300 Subject: [PATCH 2/8] net: mvneta: Limit the TSO segments and adjust stop/wake thresholds Currently small MSS values may require too many TSO descriptors for the default queue size. This commit prevents this situation by fixing the maximum supported TSO number of segments to 100 and by setting a minimum Tx queue size. The minimum Tx queue size is set so that at least 2 worst-case skb can be accommodated. In addition, the queue stop and wake thresholds values are adjusted accordingly. The queue is stopped when there's room for only 1 worst-case skb and waked when the number of descriptors is half that value. Signed-off-by: Ezequiel Garcia Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 28 +++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index af1ac82561c0..57a2e76835c0 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -251,6 +251,11 @@ /* Max number of Tx descriptors */ #define MVNETA_MAX_TXD 532 +/* Max number of allowed TCP segments for software TSO */ +#define MVNETA_MAX_TSO_SEGS 100 + +#define MVNETA_MAX_SKB_DESCS (MVNETA_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS) + /* descriptor aligned size */ #define MVNETA_DESC_ALIGNED_SIZE 32 @@ -388,6 +393,8 @@ struct mvneta_tx_queue { * descriptor ring */ int count; + int tx_stop_threshold; + int tx_wake_threshold; /* Array of transmitted skb */ struct sk_buff **tx_skb; @@ -1309,7 +1316,7 @@ static void mvneta_txq_done(struct mvneta_port *pp, txq->count -= tx_done; if (netif_tx_queue_stopped(nq)) { - if (txq->size - txq->count >= MAX_SKB_FRAGS + 1) + if (txq->count <= txq->tx_wake_threshold) netif_tx_wake_queue(nq); } } @@ -1769,7 +1776,7 @@ out: txq->count += frags; mvneta_txq_pend_desc_add(pp, txq, frags); - if (txq->size - txq->count < MAX_SKB_FRAGS + 1) + if (txq->count >= txq->tx_stop_threshold) netif_tx_stop_queue(nq); u64_stats_update_begin(&stats->syncp); @@ -2208,6 +2215,14 @@ static int mvneta_txq_init(struct mvneta_port *pp, { txq->size = pp->tx_ring_size; + /* A queue must always have room for at least one skb. + * Therefore, stop the queue when the free entries reaches + * the maximum number of descriptors per skb. + */ + txq->tx_stop_threshold = txq->size - MVNETA_MAX_SKB_DESCS; + txq->tx_wake_threshold = txq->tx_stop_threshold / 2; + + /* Allocate memory for TX descriptors */ txq->descs = dma_alloc_coherent(pp->dev->dev.parent, txq->size * MVNETA_DESC_ALIGNED_SIZE, @@ -2742,8 +2757,12 @@ static int mvneta_ethtool_set_ringparam(struct net_device *dev, return -EINVAL; pp->rx_ring_size = ring->rx_pending < MVNETA_MAX_RXD ? ring->rx_pending : MVNETA_MAX_RXD; - pp->tx_ring_size = ring->tx_pending < MVNETA_MAX_TXD ? - ring->tx_pending : MVNETA_MAX_TXD; + + pp->tx_ring_size = clamp_t(u16, ring->tx_pending, + MVNETA_MAX_SKB_DESCS * 2, MVNETA_MAX_TXD); + if (pp->tx_ring_size != ring->tx_pending) + netdev_warn(dev, "TX queue size set to %u (requested %u)\n", + pp->tx_ring_size, ring->tx_pending); if (netif_running(dev)) { mvneta_stop(dev); @@ -3028,6 +3047,7 @@ static int mvneta_probe(struct platform_device *pdev) dev->hw_features |= dev->features; dev->vlan_features |= dev->features; dev->priv_flags |= IFF_UNICAST_FLT; + dev->gso_max_segs = MVNETA_MAX_TSO_SEGS; err = register_netdev(dev); if (err < 0) { From ba7e46ef9d4550f76efb70739728c50bbf94c3a9 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 30 May 2014 13:40:06 -0300 Subject: [PATCH 3/8] net: mvneta: Fix missing DMA region unmap The Tx descriptor release code currently calls dma_unmap_single() and dev_kfree_skb_any() if the descriptor is associated with a non-NULL skb. This is true only for the last fragment of the packet. This is wrong, however, since every descriptor buffer is DMA mapped and needs to be unmapped. Signed-off-by: Ezequiel Garcia Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 57a2e76835c0..f95e7cadb01b 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1291,11 +1291,10 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp, mvneta_txq_inc_get(txq); - if (!skb) - continue; - dma_unmap_single(pp->dev->dev.parent, tx_desc->buf_phys_addr, tx_desc->data_size, DMA_TO_DEVICE); + if (!skb) + continue; dev_kfree_skb_any(skb); } } From 2e3173a3d38bbb5e29a6a734e7366855c12efbe9 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 30 May 2014 13:40:07 -0300 Subject: [PATCH 4/8] net: mvneta: Avoid unmapping the TSO header buffers The buffers for the TSO headers belong to a DMA coherent region which is allocated at ndo_open() time, and released at ndo_stop() time. Therefore, and contrary to the TSO payload descriptor buffers, the TSO header buffers don't need to be unmapped. This commit adds a check to detect a TSO header buffer and explicitly prevent the unmap. Signed-off-by: Ezequiel Garcia Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index f95e7cadb01b..45beca17fa50 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -264,6 +264,10 @@ ETH_HLEN + ETH_FCS_LEN, \ MVNETA_CPU_D_CACHE_LINE_SIZE) +#define IS_TSO_HEADER(txq, addr) \ + ((addr >= txq->tso_hdrs_phys) && \ + (addr < txq->tso_hdrs_phys + txq->size * TSO_HEADER_SIZE)) + #define MVNETA_RX_BUF_SIZE(pkt_size) ((pkt_size) + NET_SKB_PAD) struct mvneta_pcpu_stats { @@ -1291,8 +1295,10 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp, mvneta_txq_inc_get(txq); - dma_unmap_single(pp->dev->dev.parent, tx_desc->buf_phys_addr, - tx_desc->data_size, DMA_TO_DEVICE); + if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr)) + dma_unmap_single(pp->dev->dev.parent, + tx_desc->buf_phys_addr, + tx_desc->data_size, DMA_TO_DEVICE); if (!skb) continue; dev_kfree_skb_any(skb); @@ -1642,7 +1648,7 @@ err_release: */ for (i = desc_count - 1; i >= 0; i--) { struct mvneta_tx_desc *tx_desc = txq->descs + i; - if (!(tx_desc->command & MVNETA_TXD_F_DESC)) + if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr)) dma_unmap_single(pp->dev->dev.parent, tx_desc->buf_phys_addr, tx_desc->data_size, From dd11680d0e843a9e5519304a0e89dd3815439102 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 30 May 2014 13:40:08 -0300 Subject: [PATCH 5/8] net: mv643xx_eth: Count dropped packets properly This commit fixes the current dropped packet count by doing it properly, increasing the count when a packet is discarded; i.e. the packet is not processed and the driver returns NETDEV_TX_OK. Signed-off-by: Ezequiel Garcia Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mv643xx_eth.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index c68ff5deba8c..97a60de0cb32 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -967,7 +967,6 @@ static netdev_tx_t mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev) nq = netdev_get_tx_queue(dev, queue); if (has_tiny_unaligned_frags(skb) && __skb_linearize(skb)) { - txq->tx_dropped++; netdev_printk(KERN_DEBUG, dev, "failed to linearize skb with tiny unaligned fragment\n"); return NETDEV_TX_BUSY; @@ -976,6 +975,7 @@ static netdev_tx_t mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (txq->tx_ring_size - txq->tx_desc_count < MAX_SKB_FRAGS + 1) { if (net_ratelimit()) netdev_err(dev, "tx queue full?!\n"); + txq->tx_dropped++; dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -997,6 +997,9 @@ static netdev_tx_t mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev) netif_tx_stop_queue(nq); } else if (ret == -EBUSY) { return NETDEV_TX_BUSY; + } else { + txq->tx_dropped++; + dev_kfree_skb_any(skb); } return NETDEV_TX_OK; From ee9e49561bdd6d6b53a67616b5ebbb8d8c6d9c49 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 30 May 2014 13:40:09 -0300 Subject: [PATCH 6/8] net: mv643xx_eth: Limit the TSO segments and adjust stop/wake thresholds Currently small MSS values may require too many TSO descriptors for the default queue size. This commit prevents this situation by fixing the maximum supported TSO number of segments to 100 and by setting a minimum Tx queue size. The minimum Tx queue size is set so that at least 2 worst-case skb can be accommodated. In addition, the queue stop and wake thresholds values are adjusted accordingly. The queue is stopped when there's room for only 1 worst-case skb and waked when the number of descriptors is half that value. Signed-off-by: Ezequiel Garcia Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mv643xx_eth.c | 65 ++++++++++++++-------- 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 97a60de0cb32..2cea86d11a6e 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -185,6 +185,10 @@ static char mv643xx_eth_driver_version[] = "1.4"; #define TSO_HEADER_SIZE 128 +/* Max number of allowed TCP segments for software TSO */ +#define MV643XX_MAX_TSO_SEGS 100 +#define MV643XX_MAX_SKB_DESCS (MV643XX_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS) + /* * RX/TX descriptors. */ @@ -348,6 +352,9 @@ struct tx_queue { int tx_curr_desc; int tx_used_desc; + int tx_stop_threshold; + int tx_wake_threshold; + char *tso_hdrs; dma_addr_t tso_hdrs_dma; @@ -497,7 +504,7 @@ static void txq_maybe_wake(struct tx_queue *txq) if (netif_tx_queue_stopped(nq)) { __netif_tx_lock(nq, smp_processor_id()); - if (txq->tx_ring_size - txq->tx_desc_count >= MAX_SKB_FRAGS + 1) + if (txq->tx_desc_count <= txq->tx_wake_threshold) netif_tx_wake_queue(nq); __netif_tx_unlock(nq); } @@ -897,7 +904,8 @@ static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb) } } -static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb) +static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb, + struct net_device *dev) { struct mv643xx_eth_private *mp = txq_to_mp(txq); int nr_frags = skb_shinfo(skb)->nr_frags; @@ -910,11 +918,15 @@ static int txq_submit_skb(struct tx_queue *txq, struct sk_buff *skb) cmd_sts = 0; l4i_chk = 0; - ret = skb_tx_csum(mp, skb, &l4i_chk, &cmd_sts, skb->len); - if (ret) { - dev_kfree_skb_any(skb); - return ret; + if (txq->tx_ring_size - txq->tx_desc_count < MAX_SKB_FRAGS + 1) { + if (net_ratelimit()) + netdev_err(dev, "tx queue full?!\n"); + return -EBUSY; } + + ret = skb_tx_csum(mp, skb, &l4i_chk, &cmd_sts, skb->len); + if (ret) + return ret; cmd_sts |= TX_FIRST_DESC | GEN_CRC | BUFFER_OWNED_BY_DMA; tx_index = txq->tx_curr_desc++; @@ -972,28 +984,17 @@ static netdev_tx_t mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } - if (txq->tx_ring_size - txq->tx_desc_count < MAX_SKB_FRAGS + 1) { - if (net_ratelimit()) - netdev_err(dev, "tx queue full?!\n"); - txq->tx_dropped++; - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - length = skb->len; if (skb_is_gso(skb)) ret = txq_submit_tso(txq, skb, dev); else - ret = txq_submit_skb(txq, skb); + ret = txq_submit_skb(txq, skb, dev); if (!ret) { - int entries_left; - txq->tx_bytes += length; txq->tx_packets++; - entries_left = txq->tx_ring_size - txq->tx_desc_count; - if (entries_left < MAX_SKB_FRAGS + 1) + if (txq->tx_desc_count >= txq->tx_stop_threshold) netif_tx_stop_queue(nq); } else if (ret == -EBUSY) { return NETDEV_TX_BUSY; @@ -1617,7 +1618,11 @@ mv643xx_eth_set_ringparam(struct net_device *dev, struct ethtool_ringparam *er) return -EINVAL; mp->rx_ring_size = er->rx_pending < 4096 ? er->rx_pending : 4096; - mp->tx_ring_size = er->tx_pending < 4096 ? er->tx_pending : 4096; + mp->tx_ring_size = clamp_t(unsigned int, er->tx_pending, + MV643XX_MAX_SKB_DESCS * 2, 4096); + if (mp->tx_ring_size != er->tx_pending) + netdev_warn(dev, "TX queue size set to %u (requested %u)\n", + mp->tx_ring_size, er->tx_pending); if (netif_running(dev)) { mv643xx_eth_stop(dev); @@ -1993,6 +1998,13 @@ static int txq_init(struct mv643xx_eth_private *mp, int index) txq->tx_ring_size = mp->tx_ring_size; + /* A queue must always have room for at least one skb. + * Therefore, stop the queue when the free entries reaches + * the maximum number of descriptors per skb. + */ + txq->tx_stop_threshold = txq->tx_ring_size - MV643XX_MAX_SKB_DESCS; + txq->tx_wake_threshold = txq->tx_stop_threshold / 2; + txq->tx_desc_count = 0; txq->tx_curr_desc = 0; txq->tx_used_desc = 0; @@ -2852,6 +2864,7 @@ static void set_params(struct mv643xx_eth_private *mp, struct mv643xx_eth_platform_data *pd) { struct net_device *dev = mp->dev; + unsigned int tx_ring_size; if (is_valid_ether_addr(pd->mac_addr)) memcpy(dev->dev_addr, pd->mac_addr, ETH_ALEN); @@ -2866,9 +2879,16 @@ static void set_params(struct mv643xx_eth_private *mp, mp->rxq_count = pd->rx_queue_count ? : 1; - mp->tx_ring_size = DEFAULT_TX_QUEUE_SIZE; + tx_ring_size = DEFAULT_TX_QUEUE_SIZE; if (pd->tx_queue_size) - mp->tx_ring_size = pd->tx_queue_size; + tx_ring_size = pd->tx_queue_size; + + mp->tx_ring_size = clamp_t(unsigned int, tx_ring_size, + MV643XX_MAX_SKB_DESCS * 2, 4096); + if (mp->tx_ring_size != tx_ring_size) + netdev_warn(dev, "TX queue size set to %u (requested %u)\n", + mp->tx_ring_size, tx_ring_size); + mp->tx_desc_sram_addr = pd->tx_sram_addr; mp->tx_desc_sram_size = pd->tx_sram_size; @@ -3095,6 +3115,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev) dev->hw_features = dev->features; dev->priv_flags |= IFF_UNICAST_FLT; + dev->gso_max_segs = MV643XX_MAX_TSO_SEGS; SET_NETDEV_DEV(dev, &pdev->dev); From eee492ea35bd6f4bac229744eb0593547fb30826 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 30 May 2014 13:40:10 -0300 Subject: [PATCH 7/8] net: mv643xx_eth: Drop the NETDEV_TX_BUSY return path After adding proper stop/wake thresholds, we can expect a queue to never be full and drop the NETDEV_TX_BUSY return path. In any case, if the queue cannot accommodate a TSO packet, the packet would be discarded. Signed-off-by: Ezequiel Garcia Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mv643xx_eth.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 2cea86d11a6e..88afbe0435d7 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -996,8 +996,6 @@ static netdev_tx_t mv643xx_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (txq->tx_desc_count >= txq->tx_stop_threshold) netif_tx_stop_queue(nq); - } else if (ret == -EBUSY) { - return NETDEV_TX_BUSY; } else { txq->tx_dropped++; dev_kfree_skb_any(skb); From b926260c05517478a53cc029035c8eb2d8ca2715 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 30 May 2014 13:40:11 -0300 Subject: [PATCH 8/8] net: mv643xx_eth: Avoid unmapping the TSO header buffers The buffers for the TSO headers belong to a DMA coherent region which is allocated at ndo_open() time, and released at ndo_stop() time. Therefore, and contrary to the TSO payload descriptor buffers, the TSO header buffers don't need to be unmapped. This commit adds a check to detect a TSO header buffer and explicitly prevent the unmap. Signed-off-by: Ezequiel Garcia Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mv643xx_eth.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 88afbe0435d7..b151a949f352 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -189,6 +189,9 @@ static char mv643xx_eth_driver_version[] = "1.4"; #define MV643XX_MAX_TSO_SEGS 100 #define MV643XX_MAX_SKB_DESCS (MV643XX_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS) +#define IS_TSO_HEADER(txq, addr) \ + ((addr >= txq->tso_hdrs_dma) && \ + (addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE)) /* * RX/TX descriptors. */ @@ -1072,8 +1075,9 @@ static int txq_reclaim(struct tx_queue *txq, int budget, int force) mp->dev->stats.tx_errors++; } - dma_unmap_single(mp->dev->dev.parent, desc->buf_ptr, - desc->byte_cnt, DMA_TO_DEVICE); + if (!IS_TSO_HEADER(txq, desc->buf_ptr)) + dma_unmap_single(mp->dev->dev.parent, desc->buf_ptr, + desc->byte_cnt, DMA_TO_DEVICE); dev_kfree_skb(skb); }