diff --git a/Documentation/networking/rds.txt b/Documentation/networking/rds.txt index c67077cbeb80..e1a3d59bbe0f 100644 --- a/Documentation/networking/rds.txt +++ b/Documentation/networking/rds.txt @@ -62,11 +62,10 @@ Socket Interface ================ AF_RDS, PF_RDS, SOL_RDS - These constants haven't been assigned yet, because RDS isn't in - mainline yet. Currently, the kernel module assigns some constant - and publishes it to user space through two sysctl files - /proc/sys/net/rds/pf_rds - /proc/sys/net/rds/sol_rds + AF_RDS and PF_RDS are the domain type to be used with socket(2) + to create RDS sockets. SOL_RDS is the socket-level to be used + with setsockopt(2) and getsockopt(2) for RDS specific socket + options. fd = socket(PF_RDS, SOCK_SEQPACKET, 0); This creates a new, unbound RDS socket. diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 4b0494b9cc7c..1bf1cdce74ac 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -99,6 +99,7 @@ #define BE_NAPI_WEIGHT 64 #define MAX_RX_POST BE_NAPI_WEIGHT /* Frags posted at a time */ #define RX_FRAGS_REFILL_WM (RX_Q_LEN - MAX_RX_POST) +#define MAX_NUM_POST_ERX_DB 255u #define MAX_VFS 30 /* Max VFs supported by BE3 FW */ #define FW_VER_LEN 32 diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 5ff7fba9b67c..fb0bc3c3620e 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2122,7 +2122,7 @@ static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed) if (rxo->rx_post_starved) rxo->rx_post_starved = false; do { - notify = min(256u, posted); + notify = min(MAX_NUM_POST_ERX_DB, posted); be_rxq_notify(adapter, rxq->id, notify); posted -= notify; } while (posted); diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 16adbc481772..8fadaa14b9f0 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -68,8 +68,8 @@ config SMSC_PHY config BROADCOM_PHY tristate "Drivers for Broadcom PHYs" ---help--- - Currently supports the BCM5411, BCM5421, BCM5461, BCM5464, BCM5481 - and BCM5482 PHYs. + Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464, + BCM5481 and BCM5482 PHYs. config BCM63XX_PHY tristate "Drivers for Broadcom 63xx SOCs internal PHY" diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index a52afb26421b..9c71295f2fef 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -548,6 +548,19 @@ static struct phy_driver broadcom_drivers[] = { .ack_interrupt = bcm54xx_ack_interrupt, .config_intr = bcm54xx_config_intr, .driver = { .owner = THIS_MODULE }, +}, { + .phy_id = PHY_ID_BCM54616S, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM54616S", + .features = PHY_GBIT_FEATURES | + SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = bcm54xx_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = bcm54xx_ack_interrupt, + .config_intr = bcm54xx_config_intr, + .driver = { .owner = THIS_MODULE }, }, { .phy_id = PHY_ID_BCM5464, .phy_id_mask = 0xfffffff0, @@ -660,6 +673,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = { { PHY_ID_BCM5411, 0xfffffff0 }, { PHY_ID_BCM5421, 0xfffffff0 }, { PHY_ID_BCM5461, 0xfffffff0 }, + { PHY_ID_BCM54616S, 0xfffffff0 }, { PHY_ID_BCM5464, 0xfffffff0 }, { PHY_ID_BCM5482, 0xfffffff0 }, { PHY_ID_BCM5482, 0xfffffff0 }, diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 777757ae1973..733f4feb2ef3 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1072,7 +1072,7 @@ static void __handle_set_rx_mode(struct usbnet *dev) * especially now that control transfers can be queued. */ static void -kevent (struct work_struct *work) +usbnet_deferred_kevent (struct work_struct *work) { struct usbnet *dev = container_of(work, struct usbnet, kevent); @@ -1626,7 +1626,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) skb_queue_head_init(&dev->rxq_pause); dev->bh.func = usbnet_bh; dev->bh.data = (unsigned long) dev; - INIT_WORK (&dev->kevent, kevent); + INIT_WORK (&dev->kevent, usbnet_deferred_kevent); init_usb_anchor(&dev->deferred); dev->delay.function = usbnet_bh; dev->delay.data = (unsigned long) dev; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 577c9b071ad9..154116aafd0d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1699,12 +1699,6 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk, } } - skb = iptunnel_handle_offloads(skb, udp_sum, type); - if (IS_ERR(skb)) { - err = -EINVAL; - goto err; - } - skb_scrub_packet(skb, xnet); min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len @@ -1724,6 +1718,12 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk, goto err; } + skb = iptunnel_handle_offloads(skb, udp_sum, type); + if (IS_ERR(skb)) { + err = -EINVAL; + goto err; + } + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_HF_VNI); vxh->vx_vni = md->vni; @@ -1784,10 +1784,6 @@ int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, } } - skb = iptunnel_handle_offloads(skb, udp_sum, type); - if (IS_ERR(skb)) - return PTR_ERR(skb); - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + VXLAN_HLEN + sizeof(struct iphdr) + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); @@ -1803,6 +1799,10 @@ int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, if (WARN_ON(!skb)) return -ENOMEM; + skb = iptunnel_handle_offloads(skb, udp_sum, type); + if (IS_ERR(skb)) + return PTR_ERR(skb); + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_HF_VNI); vxh->vx_vni = md->vni; diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index cab606617522..ae2982c0f7a6 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -11,6 +11,7 @@ #define PHY_ID_BCM5421 0x002060e0 #define PHY_ID_BCM5464 0x002060b0 #define PHY_ID_BCM5461 0x002060c0 +#define PHY_ID_BCM54616S 0x03625d10 #define PHY_ID_BCM57780 0x03625d90 #define PHY_ID_BCM7250 0xae025280 diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 263710259774..af150b43b214 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -886,12 +886,12 @@ EXPORT_SYMBOL(gue_build_header); #ifdef CONFIG_NET_FOU_IP_TUNNELS -static const struct ip_tunnel_encap_ops __read_mostly fou_iptun_ops = { +static const struct ip_tunnel_encap_ops fou_iptun_ops = { .encap_hlen = fou_encap_hlen, .build_header = fou_build_header, }; -static const struct ip_tunnel_encap_ops __read_mostly gue_iptun_ops = { +static const struct ip_tunnel_encap_ops gue_iptun_ops = { .encap_hlen = gue_encap_hlen, .build_header = gue_build_header, }; diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index b77f5e84c623..8986e63f3bda 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -113,10 +113,6 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, int min_headroom; int err; - skb = udp_tunnel_handle_offloads(skb, csum); - if (IS_ERR(skb)) - return PTR_ERR(skb); - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); @@ -131,6 +127,10 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, if (unlikely(!skb)) return -ENOMEM; + skb = udp_tunnel_handle_offloads(skb, csum); + if (IS_ERR(skb)) + return PTR_ERR(skb); + gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e662d85d1635..8c8d7e06b72f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2994,6 +2994,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, rcu_read_unlock(); #endif + /* Do not fool tcpdump (if any), clean our debris */ + skb->tstamp.tv64 = 0; return skb; } EXPORT_SYMBOL(tcp_make_synack); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index b53148444e15..ed9d681207fa 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -288,8 +288,7 @@ static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p, static void vti6_dev_uninit(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct net *net = dev_net(dev); - struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct vti6_net *ip6n = net_generic(t->net, vti6_net_id); if (dev == ip6n->fb_tnl_dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); diff --git a/net/rds/connection.c b/net/rds/connection.c index 378c3a6acf84..14f041398ca1 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -130,7 +130,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, rcu_read_lock(); conn = rds_conn_lookup(head, laddr, faddr, trans); if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && - !is_outgoing) { + laddr == faddr && !is_outgoing) { /* This is a looped back IB connection, and we're * called by the code handling the incoming connect. * We need a second connection object into which we @@ -193,6 +193,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, } atomic_set(&conn->c_state, RDS_CONN_DOWN); + conn->c_send_gen = 0; conn->c_reconnect_jiffies = 0; INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker); INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker); diff --git a/net/rds/rds.h b/net/rds/rds.h index c3f2855c3d84..0d41155a2258 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -110,6 +110,7 @@ struct rds_connection { void *c_transport_data; atomic_t c_state; + unsigned long c_send_gen; unsigned long c_flags; unsigned long c_reconnect_jiffies; struct delayed_work c_send_w; diff --git a/net/rds/send.c b/net/rds/send.c index 44672befc0ee..e9430f537f9c 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -140,8 +140,11 @@ int rds_send_xmit(struct rds_connection *conn) struct scatterlist *sg; int ret = 0; LIST_HEAD(to_be_dropped); + int batch_count; + unsigned long send_gen = 0; restart: + batch_count = 0; /* * sendmsg calls here after having queued its message on the send @@ -156,6 +159,17 @@ restart: goto out; } + /* + * we record the send generation after doing the xmit acquire. + * if someone else manages to jump in and do some work, we'll use + * this to avoid a goto restart farther down. + * + * The acquire_in_xmit() check above ensures that only one + * caller can increment c_send_gen at any time. + */ + conn->c_send_gen++; + send_gen = conn->c_send_gen; + /* * rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT, * we do the opposite to avoid races. @@ -202,6 +216,16 @@ restart: if (!rm) { unsigned int len; + batch_count++; + + /* we want to process as big a batch as we can, but + * we also want to avoid softlockups. If we've been + * through a lot of messages, lets back off and see + * if anyone else jumps in + */ + if (batch_count >= 1024) + goto over_batch; + spin_lock_irqsave(&conn->c_lock, flags); if (!list_empty(&conn->c_send_queue)) { @@ -357,9 +381,9 @@ restart: } } +over_batch: if (conn->c_trans->xmit_complete) conn->c_trans->xmit_complete(conn); - release_in_xmit(conn); /* Nuke any messages we decided not to retransmit. */ @@ -380,10 +404,15 @@ restart: * If the transport cannot continue (i.e ret != 0), then it must * call us when more room is available, such as from the tx * completion handler. + * + * We have an extra generation check here so that if someone manages + * to jump in after our release_in_xmit, we'll see that they have done + * some work and we will skip our goto */ if (ret == 0) { smp_mb(); - if (!list_empty(&conn->c_send_queue)) { + if (!list_empty(&conn->c_send_queue) && + send_gen == conn->c_send_gen) { rds_stats_inc(s_send_lock_queue_raced); goto restart; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 179f1c8c0d8b..956ead2cab9a 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -560,8 +560,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) tfifo_dequeue: skb = __skb_dequeue(&sch->q); if (skb) { -deliver: qdisc_qstats_backlog_dec(sch, skb); +deliver: qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); return skb; @@ -578,6 +578,7 @@ deliver: rb_erase(p, &q->t_root); sch->q.qlen--; + qdisc_qstats_backlog_dec(sch, skb); skb->next = NULL; skb->prev = NULL; skb->tstamp = netem_skb_cb(skb)->tstamp_save; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 85d1d4764612..526c4feb3b50 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -238,11 +238,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp->xvec[skb->sp->len++] = x; - if (xfrm_tunnel_check(skb, x, family)) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); - goto drop; - } - spin_lock(&x->lock); if (unlikely(x->km.state == XFRM_STATE_ACQ)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); @@ -271,6 +266,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) spin_unlock(&x->lock); + if (xfrm_tunnel_check(skb, x, family)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); + goto drop; + } + seq_hi = htonl(xfrm_replay_seqhi(x, seq)); XFRM_SKB_CB(skb)->seq.input.low = seq;