From 8f22ba61b5d730a870cd6b10d299d23280d060fa Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sun, 26 Jan 2014 11:39:26 +0000 Subject: [PATCH 01/32] RxRPC: do not unlock unheld spinlock in rxrpc_connect_exclusive() If rx->conn is not NULL, rxrpc_connect_exclusive() does not acquire the transport's client lock, but it still releases it. The patch adds locking of the spinlock to this path. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David Howells --- net/rxrpc/ar-connection.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 4106ca95ec86..7bf5b5b9e8b9 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -381,6 +381,8 @@ static int rxrpc_connect_exclusive(struct rxrpc_sock *rx, rxrpc_assign_connection_id(conn); rx->conn = conn; + } else { + spin_lock(&trans->client_lock); } /* we've got a connection with a free channel and we can now attach the From 24a9981ee9c7266fa171bbece62062bf78c4d246 Mon Sep 17 00:00:00 2001 From: Tim Smith Date: Sun, 26 Jan 2014 11:39:28 +0000 Subject: [PATCH 02/32] af_rxrpc: Avoid setting up double-free on checksum error skb_kill_datagram() does not dequeue the skb when MSG_PEEK is unset. This leaves a free'd skb on the queue, resulting a double-free later. Without this, the following oops can occur: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [] skb_dequeue+0x47/0x70 PGD 0 Oops: 0002 [#1] SMP Modules linked in: af_rxrpc ... CPU: 0 PID: 1191 Comm: listen Not tainted 3.12.0+ #4 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8801183536b0 ti: ffff880035c92000 task.ti: ffff880035c92000 RIP: 0010:[] skb_dequeue+0x47/0x70 RSP: 0018:ffff880035c93db8 EFLAGS: 00010097 RAX: 0000000000000246 RBX: ffff8800d2754b00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000202 RDI: ffff8800d254c084 RBP: ffff880035c93dd0 R08: ffff880035c93cf0 R09: ffff8800d968f270 R10: 0000000000000000 R11: 0000000000000293 R12: ffff8800d254c070 R13: ffff8800d254c084 R14: ffff8800cd861240 R15: ffff880119b39720 FS: 00007f37a969d740(0000) GS:ffff88011fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000008 CR3: 00000000d4413000 CR4: 00000000000006f0 Stack: ffff8800d254c000 ffff8800d254c070 ffff8800d254c2c0 ffff880035c93df8 ffffffffa041a5b8 ffff8800cd844c80 ffffffffa04385a0 ffff8800cd844cb0 ffff880035c93e18 ffffffff81546cef ffff8800d45fea00 0000000000000008 Call Trace: [] rxrpc_release+0x128/0x2e0 [af_rxrpc] [] sock_release+0x1f/0x80 [] sock_close+0x12/0x20 [] __fput+0xe1/0x230 [] ____fput+0xe/0x10 [] task_work_run+0xbc/0xe0 [] do_exit+0x2be/0xa10 [] ? do_munmap+0x297/0x3b0 [] do_group_exit+0x3f/0xa0 [] SyS_exit_group+0x14/0x20 [] system_call_fastpath+0x16/0x1b Signed-off-by: Tim Smith Signed-off-by: David Howells --- net/rxrpc/ar-recvmsg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 898492a8d61b..64cba2e35156 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -353,6 +353,10 @@ csum_copy_error: if (continue_call) rxrpc_put_call(continue_call); rxrpc_kill_skb(skb); + if (!(flags & MSG_PEEK)) { + if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) + BUG(); + } skb_kill_datagram(&rx->sk, skb, flags); rxrpc_put_call(call); return -EAGAIN; From 1ea427359dde1573815e19c411ce08fdf0c42cfe Mon Sep 17 00:00:00 2001 From: Tim Smith Date: Sun, 26 Jan 2014 11:39:31 +0000 Subject: [PATCH 03/32] af_rxrpc: Handle frames delivered from another VM On input, CHECKSUM_PARTIAL should be treated the same way as CHECKSUM_UNNECESSARY. See include/linux/skbuff.h Signed-off-by: Tim Smith Signed-off-by: David Howells --- net/rxrpc/ar-recvmsg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 64cba2e35156..34b5490dde65 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -180,7 +180,8 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, if (copy > len - copied) copy = len - copied; - if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (skb->ip_summed == CHECKSUM_UNNECESSARY || + skb->ip_summed == CHECKSUM_PARTIAL) { ret = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copy); } else { From de960aa9ab4decc3304959f69533eef64d05d8e8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 26 Jan 2014 10:58:16 +0100 Subject: [PATCH 04/32] net: add and use skb_gso_transport_seglen() This moves part of Eric Dumazets skb_gso_seglen helper from tbf sched to skbuff core so it may be reused by upcoming ip forwarding path patch. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 25 +++++++++++++++++++++++++ net/sched/sch_tbf.c | 13 +++---------- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1f689e62e4cb..f589c9af8cbf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2456,6 +2456,7 @@ void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); +unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct skb_checksum_ops { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8f519dbb358b..9ae6d11374d1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -47,6 +47,8 @@ #include #include #include +#include +#include #include #ifdef CONFIG_NET_CLS_ACT #include @@ -3916,3 +3918,26 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) nf_reset_trace(skb); } EXPORT_SYMBOL_GPL(skb_scrub_packet); + +/** + * skb_gso_transport_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_transport_seglen is used to determine the real size of the + * individual segments, including Layer4 headers (TCP/UDP). + * + * The MAC/L2 or network (IP, IPv6) headers are not accounted for. + */ +unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + unsigned int hdr_len; + + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + hdr_len = tcp_hdrlen(skb); + else + hdr_len = sizeof(struct udphdr); + return hdr_len + shinfo->gso_size; +} +EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index fbba5b0ec121..1cb413fead89 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -21,7 +21,6 @@ #include #include #include -#include /* Simple Token Bucket Filter. @@ -148,16 +147,10 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r, * Return length of individual segments of a gso packet, * including all headers (MAC, IP, TCP/UDP) */ -static unsigned int skb_gso_seglen(const struct sk_buff *skb) +static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) { unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); - const struct skb_shared_info *shinfo = skb_shinfo(skb); - - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - hdr_len += tcp_hdrlen(skb); - else - hdr_len += sizeof(struct udphdr); - return hdr_len + shinfo->gso_size; + return hdr_len + skb_gso_transport_seglen(skb); } /* GSO packet is too big, segment it so that tbf can transmit @@ -202,7 +195,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) int ret; if (qdisc_pkt_len(skb) > q->max_size) { - if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size) + if (skb_is_gso(skb) && skb_gso_mac_seglen(skb) <= q->max_size) return tbf_segment(skb, sch); return qdisc_reshape_fail(skb, sch); } From b565c9f7458f44f1b2369d7a7ab9346d55546161 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 26 Jan 2014 11:44:23 +0100 Subject: [PATCH 05/32] net/apne: Remove unused variable ei_local MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/net/ethernet/8390/apne.c: In function ‘apne_probe1’: drivers/net/ethernet/8390/apne.c:215: warning: unused variable ‘ei_local’ Introduced by commit c45f812f0280c13f1b7992be5e0de512312a9e8f ("8390 : Replace ei_debug with msg_enable/NETIF_MSG_* feature"), which added the variable without using it. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ethernet/8390/apne.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/8390/apne.c b/drivers/net/ethernet/8390/apne.c index 811fa5d5c697..30104b60da85 100644 --- a/drivers/net/ethernet/8390/apne.c +++ b/drivers/net/ethernet/8390/apne.c @@ -212,7 +212,6 @@ static int __init apne_probe1(struct net_device *dev, int ioaddr) int neX000, ctron; #endif static unsigned version_printed; - struct ei_device *ei_local = netdev_priv(dev); if ((apne_msg_enable & NETIF_MSG_DRV) && (version_printed++ == 0)) netdev_info(dev, version); From 7509edd6e95e5bef2d60b1ca0adc9859b8775694 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 26 Jan 2014 15:50:43 +0100 Subject: [PATCH 06/32] net: stmmac: Silence PTP init errors on hw without PTP Logging a PTP error on hw which simply does not support PTP is not very useful. Moreover this message gets logged on every if-up, and if there is no cable inserted NetworkManager will re-try the ifup every 50 seconds. Signed-off-by: Hans de Goede Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d93aa87408c2..a395bb24cdf2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1635,7 +1635,7 @@ static int stmmac_hw_setup(struct net_device *dev) stmmac_mmc_setup(priv); ret = stmmac_init_ptp(priv); - if (ret) + if (ret && ret != -EOPNOTSUPP) pr_warn("%s: failed PTP initialisation\n", __func__); #ifdef CONFIG_STMMAC_DEBUG_FS From c88460b778089525fbcf60b839b8db714406d119 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 26 Jan 2014 15:50:44 +0100 Subject: [PATCH 07/32] net: stmmac: Log MAC address only once Logging the MAC address on every if-up, is not really useful, and annoying when there is no cable inserted and NetworkManager tries the ifup every 50 seconds. Also change the log level from warning to info, as that is what it is. Signed-off-by: Hans de Goede Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index a395bb24cdf2..a2e7d2c96e36 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1524,9 +1524,9 @@ static void stmmac_check_ether_addr(struct stmmac_priv *priv) priv->dev->dev_addr, 0); if (!is_valid_ether_addr(priv->dev->dev_addr)) eth_hw_addr_random(priv->dev); + pr_info("%s: device MAC address %pM\n", priv->dev->name, + priv->dev->dev_addr); } - pr_warn("%s: device MAC address %pM\n", priv->dev->name, - priv->dev->dev_addr); } /** From 27d79f3b1071b2a2d58443a130e92c381c838e5d Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Mon, 27 Jan 2014 12:13:57 +0530 Subject: [PATCH 08/32] net: ipv4: Use PTR_ERR_OR_ZERO PTR_RET is deprecated. Use PTR_ERR_OR_ZERO instead. While at it also include missing err.h header. Signed-off-by: Sachin Kamat Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index c0e3cb72ad70..bd28f386bd02 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -930,7 +931,7 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, } rtnl_unlock(); - return PTR_RET(itn->fb_tunnel_dev); + return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); } EXPORT_SYMBOL_GPL(ip_tunnel_init_net); From 5f6db130b5dab96eb1fe6b2c1d1131a74149f949 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 27 Jan 2014 17:11:58 +0200 Subject: [PATCH 09/32] bnx2x: More Shutdown revisions Submission d9aee59 "bnx2x: Don't release PCI bars on shutdown" separated the PCI remove and shutdown flows, but pci_disable_device() is still being called on both. As a result, a dev_WARN_ONCE will be hit during shutdown for every bnx2x VF probed on a hypervisor (as its shutdown callback will be called and later pci_disable_sriov() will call its remove callback). This calls the pci_disable_device() only on the remove flow. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index e118a3ec62bc..c9c445e7b4a5 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13102,9 +13102,9 @@ static void __bnx2x_remove(struct pci_dev *pdev, if (atomic_read(&pdev->enable_cnt) == 1) pci_release_regions(pdev); - } - pci_disable_device(pdev); + pci_disable_device(pdev); + } } static void bnx2x_remove_one(struct pci_dev *pdev) From bb9fbe2ddd6d6111c5fe5987fa1e71da7d2ab854 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 27 Jan 2014 11:43:04 -0500 Subject: [PATCH 10/32] AF_PACKET: Add documentation for queue mapping fanout mode Recently I added a new AF_PACKET fanout operation mode in commit 2d36097, but I forgot to document it. Add PACKET_FANOUT_QM as an available mode in the af_packet documentation. Applies to net-next. Signed-off-by: Neil Horman CC: "David S. Miller" CC: Daniel Borkmann Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- Documentation/networking/packet_mmap.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 91ffe1d9e8ca..1404674c0a02 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -583,6 +583,7 @@ Currently implemented fanout policies are: - PACKET_FANOUT_CPU: schedule to socket by CPU packet arrives on - PACKET_FANOUT_RND: schedule to socket by random selection - PACKET_FANOUT_ROLLOVER: if one socket is full, rollover to another + - PACKET_FANOUT_QM: schedule to socket by skbs recorded queue_mapping Minimal example code by David S. Miller (try things like "./test eth0 hash", "./test eth0 lb", etc.): From 98b90f26651f9d84cfbb0221c9a3d9863c5bea69 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Mon, 27 Jan 2014 14:37:31 +0100 Subject: [PATCH 11/32] bonding: RCUify bond_ab_arp_probe Currently bond_ab_arp_probe() is always called under rcu_read_lock(), however to work with curr_active_slave we're still holding the curr_slave_lock. To remove that curr_slave_lock - rcu_dereference the bond's curr_active_slave and use it further - so that we're sure the slave won't go away, and we don't care if it will change in the meanwhile. CC: Jay Vosburgh CC: Andy Gospodarek Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a7db819bca92..27e6fddb2206 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2605,25 +2605,21 @@ do_failover: static void bond_ab_arp_probe(struct bonding *bond) { struct slave *slave, *before = NULL, *new_slave = NULL, - *curr_arp_slave = rcu_dereference(bond->current_arp_slave); + *curr_arp_slave = rcu_dereference(bond->current_arp_slave), + *curr_active_slave = rcu_dereference(bond->curr_active_slave); struct list_head *iter; bool found = false; - read_lock(&bond->curr_slave_lock); - - if (curr_arp_slave && bond->curr_active_slave) + if (curr_arp_slave && curr_active_slave) pr_info("PROBE: c_arp %s && cas %s BAD\n", curr_arp_slave->dev->name, - bond->curr_active_slave->dev->name); + curr_active_slave->dev->name); - if (bond->curr_active_slave) { - bond_arp_send_all(bond, bond->curr_active_slave); - read_unlock(&bond->curr_slave_lock); + if (curr_active_slave) { + bond_arp_send_all(bond, curr_active_slave); return; } - read_unlock(&bond->curr_slave_lock); - /* if we don't have a curr_active_slave, search for the next available * backup slave from the current_arp_slave and make it the candidate * for becoming the curr_active_slave From f2ebd477f141bc09b10fb8deb612a4d9b8999bba Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Mon, 27 Jan 2014 14:37:32 +0100 Subject: [PATCH 12/32] bonding: restructure locking of bond_ab_arp_probe() Currently we're calling it from under RCU context, however we're using some functions that require rtnl to be held. Fix this by restructuring the locking - don't call it under any locks, aquire rcu_read_lock() if we're sending _only_ (i.e. we have the active slave present), and use rtnl locking otherwise - if we need to modify (in)active flags of a slave. CC: Jay Vosburgh CC: Andy Gospodarek Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 57 +++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 27e6fddb2206..dd75615d85f2 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2599,17 +2599,18 @@ do_failover: /* * Send ARP probes for active-backup mode ARP monitor. - * - * Called with rcu_read_lock hold. */ -static void bond_ab_arp_probe(struct bonding *bond) +static bool bond_ab_arp_probe(struct bonding *bond) { struct slave *slave, *before = NULL, *new_slave = NULL, - *curr_arp_slave = rcu_dereference(bond->current_arp_slave), - *curr_active_slave = rcu_dereference(bond->curr_active_slave); + *curr_arp_slave, *curr_active_slave; struct list_head *iter; bool found = false; + rcu_read_lock(); + curr_arp_slave = rcu_dereference(bond->current_arp_slave); + curr_active_slave = rcu_dereference(bond->curr_active_slave); + if (curr_arp_slave && curr_active_slave) pr_info("PROBE: c_arp %s && cas %s BAD\n", curr_arp_slave->dev->name, @@ -2617,23 +2618,32 @@ static void bond_ab_arp_probe(struct bonding *bond) if (curr_active_slave) { bond_arp_send_all(bond, curr_active_slave); - return; + rcu_read_unlock(); + return true; } + rcu_read_unlock(); /* if we don't have a curr_active_slave, search for the next available * backup slave from the current_arp_slave and make it the candidate * for becoming the curr_active_slave */ + if (!rtnl_trylock()) + return false; + /* curr_arp_slave might have gone away */ + curr_arp_slave = ACCESS_ONCE(bond->current_arp_slave); + if (!curr_arp_slave) { - curr_arp_slave = bond_first_slave_rcu(bond); - if (!curr_arp_slave) - return; + curr_arp_slave = bond_first_slave(bond); + if (!curr_arp_slave) { + rtnl_unlock(); + return true; + } } bond_set_slave_inactive_flags(curr_arp_slave); - bond_for_each_slave_rcu(bond, slave, iter) { + bond_for_each_slave(bond, slave, iter) { if (!found && !before && IS_UP(slave->dev)) before = slave; @@ -2663,21 +2673,26 @@ static void bond_ab_arp_probe(struct bonding *bond) if (!new_slave && before) new_slave = before; - if (!new_slave) - return; + if (!new_slave) { + rtnl_unlock(); + return true; + } new_slave->link = BOND_LINK_BACK; bond_set_slave_active_flags(new_slave); bond_arp_send_all(bond, new_slave); new_slave->jiffies = jiffies; rcu_assign_pointer(bond->current_arp_slave, new_slave); + rtnl_unlock(); + + return true; } static void bond_activebackup_arp_mon(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, arp_work.work); - bool should_notify_peers = false; + bool should_notify_peers = false, should_commit = false; int delta_in_ticks; delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); @@ -2686,12 +2701,11 @@ static void bond_activebackup_arp_mon(struct work_struct *work) goto re_arm; rcu_read_lock(); - should_notify_peers = bond_should_notify_peers(bond); + should_commit = bond_ab_arp_inspect(bond); + rcu_read_unlock(); - if (bond_ab_arp_inspect(bond)) { - rcu_read_unlock(); - + if (should_commit) { /* Race avoidance with bond_close flush of workqueue */ if (!rtnl_trylock()) { delta_in_ticks = 1; @@ -2700,13 +2714,14 @@ static void bond_activebackup_arp_mon(struct work_struct *work) } bond_ab_arp_commit(bond); - rtnl_unlock(); - rcu_read_lock(); } - bond_ab_arp_probe(bond); - rcu_read_unlock(); + if (!bond_ab_arp_probe(bond)) { + /* rtnl locking failed, re-arm */ + delta_in_ticks = 1; + should_notify_peers = false; + } re_arm: if (bond->params.arp_interval) From a452ce345d63ddf92cd101e4196569f8718ad319 Mon Sep 17 00:00:00 2001 From: Holger Eitzenberger Date: Mon, 27 Jan 2014 10:33:18 +0100 Subject: [PATCH 13/32] net: Fix memory leak if TPROXY used with TCP early demux I see a memory leak when using a transparent HTTP proxy using TPROXY together with TCP early demux and Kernel v3.8.13.15 (Ubuntu stable): unreferenced object 0xffff88008cba4a40 (size 1696): comm "softirq", pid 0, jiffies 4294944115 (age 8907.520s) hex dump (first 32 bytes): 0a e0 20 6a 40 04 1b 37 92 be 32 e2 e8 b4 00 00 .. j@..7..2..... 02 00 07 01 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmem_cache_alloc+0xad/0xb9 [] sk_prot_alloc+0x29/0xc5 [] sk_clone_lock+0x14/0x283 [] inet_csk_clone_lock+0xf/0x7b [] netlink_broadcast+0x14/0x16 [] tcp_create_openreq_child+0x1b/0x4c3 [] tcp_v4_syn_recv_sock+0x38/0x25d [] tcp_check_req+0x25c/0x3d0 [] tcp_v4_do_rcv+0x287/0x40e [] ip_route_input_noref+0x843/0xa55 [] tcp_v4_rcv+0x4c9/0x725 [] ip_local_deliver_finish+0xe9/0x154 [] __netif_receive_skb+0x4b2/0x514 [] process_backlog+0xee/0x1c5 [] net_rx_action+0xa7/0x200 [] add_interrupt_randomness+0x39/0x157 But there are many more, resulting in the machine going OOM after some days. From looking at the TPROXY code, and with help from Florian, I see that the memory leak is introduced in tcp_v4_early_demux(): void tcp_v4_early_demux(struct sk_buff *skb) { /* ... */ iph = ip_hdr(skb); th = tcp_hdr(skb); if (th->doff < sizeof(struct tcphdr) / 4) return; sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, ntohs(th->dest), skb->skb_iif); if (sk) { skb->sk = sk; where the socket is assigned unconditionally to skb->sk, also bumping the refcnt on it. This is problematic, because in our case the skb has already a socket assigned in the TPROXY target. This then results in the leak I see. The very same issue seems to be with IPv6, but haven't tested. Reviewed-by: Florian Westphal Signed-off-by: Holger Eitzenberger Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 2 +- net/ipv6/ip6_input.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 054a3e97d822..3d4da2c16b6a 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -314,7 +314,7 @@ static int ip_rcv_finish(struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; - if (sysctl_ip_early_demux && !skb_dst(skb)) { + if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct net_protocol *ipprot; int protocol = iph->protocol; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 302d6fb1ff2b..51d54dc376f3 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -49,7 +49,7 @@ int ip6_rcv_finish(struct sk_buff *skb) { - if (sysctl_ip_early_demux && !skb_dst(skb)) { + if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct inet6_protocol *ipprot; ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); From 731073b9c99d46c6b6c01184f67ee6f75fd7a163 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Sat, 25 Jan 2014 11:34:54 +0100 Subject: [PATCH 14/32] sky2: initialize napi before registering device There is race condition when call netif_napi_add() after register_netdevice(), as ->open() can be called without napi initialized and trigger BUG_ON() on napi_enable(), like on below messages: [ 9.699863] sky2: driver version 1.30 [ 9.699960] sky2 0000:02:00.0: Yukon-2 EC Ultra chip revision 2 [ 9.700020] sky2 0000:02:00.0: irq 45 for MSI/MSI-X [ 9.700498] ------------[ cut here ]------------ [ 9.703391] kernel BUG at include/linux/netdevice.h:501! [ 9.703391] invalid opcode: 0000 [#1] PREEMPT SMP [ 9.830018] Call Trace: [ 9.830018] [] sky2_open+0x309/0x360 [sky2] [ 9.830018] [] ? via_no_dac+0x40/0x40 [ 9.830018] [] ? via_no_dac+0x40/0x40 [ 9.830018] [] __dev_open+0x9b/0x120 [ 9.830018] [] ? _raw_spin_unlock_bh+0x1e/0x20 [ 9.830018] [] __dev_change_flags+0x89/0x150 [ 9.830018] [] dev_change_flags+0x18/0x50 [ 9.830018] [] devinet_ioctl+0x5d0/0x6e0 [ 9.830018] [] inet_ioctl+0x6d/0xa0 To fix the problem patch changes the order of initialization. Bug report: https://bugzilla.kernel.org/show_bug.cgi?id=67151 Reported-and-tested-by: ebrahim.azarisooreh@gmail.com Signed-off-by: Stanislaw Gruszka Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/sky2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 6509935d145e..55a37ae11440 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -5020,6 +5020,8 @@ static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } } + netif_napi_add(dev, &hw->napi, sky2_poll, NAPI_WEIGHT); + err = register_netdev(dev); if (err) { dev_err(&pdev->dev, "cannot register net device\n"); @@ -5028,8 +5030,6 @@ static int sky2_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netif_carrier_off(dev); - netif_napi_add(dev, &hw->napi, sky2_poll, NAPI_WEIGHT); - sky2_show_addr(dev); if (hw->ports > 1) { From b679ef73edc251f6d200a7dd2396e9fef9e36fc3 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Mon, 27 Jan 2014 15:03:42 -0800 Subject: [PATCH 15/32] hyperv: Add support for physically discontinuous receive buffer This will allow us to use bigger receive buffer, and prevent allocation failure due to fragmented memory. Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/hv/channel.c | 14 ++++++++------ drivers/net/hyperv/hyperv_net.h | 2 +- drivers/net/hyperv/netvsc.c | 7 ++----- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index cea623c36ae2..69ea36f07b4d 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -209,7 +209,6 @@ static int create_gpadl_header(void *kbuffer, u32 size, { int i; int pagecount; - unsigned long long pfn; struct vmbus_channel_gpadl_header *gpadl_header; struct vmbus_channel_gpadl_body *gpadl_body; struct vmbus_channel_msginfo *msgheader; @@ -219,7 +218,6 @@ static int create_gpadl_header(void *kbuffer, u32 size, int pfnsum, pfncount, pfnleft, pfncurr, pfnsize; pagecount = size >> PAGE_SHIFT; - pfn = virt_to_phys(kbuffer) >> PAGE_SHIFT; /* do we need a gpadl body msg */ pfnsize = MAX_SIZE_CHANNEL_MESSAGE - @@ -248,7 +246,8 @@ static int create_gpadl_header(void *kbuffer, u32 size, gpadl_header->range[0].byte_offset = 0; gpadl_header->range[0].byte_count = size; for (i = 0; i < pfncount; i++) - gpadl_header->range[0].pfn_array[i] = pfn+i; + gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys( + kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT; *msginfo = msgheader; *messagecount = 1; @@ -301,7 +300,9 @@ static int create_gpadl_header(void *kbuffer, u32 size, * so the hypervisor gurantees that this is ok. */ for (i = 0; i < pfncurr; i++) - gpadl_body->pfn[i] = pfn + pfnsum + i; + gpadl_body->pfn[i] = slow_virt_to_phys( + kbuffer + PAGE_SIZE * (pfnsum + i)) >> + PAGE_SHIFT; /* add to msg header */ list_add_tail(&msgbody->msglistentry, @@ -327,7 +328,8 @@ static int create_gpadl_header(void *kbuffer, u32 size, gpadl_header->range[0].byte_offset = 0; gpadl_header->range[0].byte_count = size; for (i = 0; i < pagecount; i++) - gpadl_header->range[0].pfn_array[i] = pfn+i; + gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys( + kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT; *msginfo = msgheader; *messagecount = 1; @@ -344,7 +346,7 @@ nomem: * vmbus_establish_gpadl - Estabish a GPADL for the specified buffer * * @channel: a channel - * @kbuffer: from kmalloc + * @kbuffer: from kmalloc or vmalloc * @size: page-size multiple * @gpadl_handle: some funky thing */ diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index a26eecb1212c..7b594ce3f21d 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -462,7 +462,7 @@ struct nvsp_message { #define NETVSC_MTU 65536 -#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*2) /* 2MB */ +#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ #define NETVSC_RECEIVE_BUFFER_ID 0xcafe diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 93b485b96249..03a2c6e17158 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -136,8 +136,7 @@ static int netvsc_destroy_recv_buf(struct netvsc_device *net_device) if (net_device->recv_buf) { /* Free up the receive buffer */ - free_pages((unsigned long)net_device->recv_buf, - get_order(net_device->recv_buf_size)); + vfree(net_device->recv_buf); net_device->recv_buf = NULL; } @@ -163,9 +162,7 @@ static int netvsc_init_recv_buf(struct hv_device *device) return -ENODEV; ndev = net_device->ndev; - net_device->recv_buf = - (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, - get_order(net_device->recv_buf_size)); + net_device->recv_buf = vzalloc(net_device->recv_buf_size); if (!net_device->recv_buf) { netdev_err(ndev, "unable to allocate receive " "buffer of size %d\n", net_device->recv_buf_size); From ce60e0c4df5f95086d5c2662c5cfa0beb8181c6d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 7 Jan 2014 12:52:43 +1100 Subject: [PATCH 16/32] net: 6lowpan: fixup for code movement Signed-off-by: Stephen Rothwell Signed-off-by: David S. Miller --- net/ieee802154/6lowpan_iphc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ieee802154/6lowpan_iphc.c b/net/ieee802154/6lowpan_iphc.c index 083f905bf109..860aa2d445ba 100644 --- a/net/ieee802154/6lowpan_iphc.c +++ b/net/ieee802154/6lowpan_iphc.c @@ -678,7 +678,7 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev, hc06_ptr += 3; } else { /* compress nothing */ - memcpy(hc06_ptr, &hdr, 4); + memcpy(hc06_ptr, hdr, 4); /* replace the top byte with new ECN | DSCP format */ *hc06_ptr = tmp; hc06_ptr += 4; From cefe0078eea52af17411eb1248946a94afb84ca5 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Tue, 28 Jan 2014 11:35:42 +0800 Subject: [PATCH 17/32] xen-netfront: fix resource leak in netfront This patch removes grant transfer releasing code from netfront, and uses gnttab_end_foreign_access to end grant access since gnttab_end_foreign_access_ref may fail when the grant entry is currently used for reading or writing. * clean up grant transfer code kept from old netfront(2.6.18) which grants pages for access/map and transfer. But grant transfer is deprecated in current netfront, so remove corresponding release code for transfer. * fix resource leak, release grant access (through gnttab_end_foreign_access) and skb for tx/rx path, use get_page to ensure page is released when grant access is completed successfully. Xen-blkfront/xen-tpmfront/xen-pcifront also have similar issue, but patches for them will be created separately. V6: Correct subject line and commit message. V5: Remove unecessary change in xennet_end_access. V4: Revert put_page in gnttab_end_foreign_access, and keep netfront change in single patch. V3: Changes as suggestion from David Vrabel, ensure pages are not freed untill grant acess is ended. V2: Improve patch comments. Signed-off-by: Annie Li Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 94 ++++++++++++-------------------------- 1 file changed, 29 insertions(+), 65 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index e955c5692986..ff04d4f95baa 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -117,6 +117,7 @@ struct netfront_info { } tx_skbs[NET_TX_RING_SIZE]; grant_ref_t gref_tx_head; grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; + struct page *grant_tx_page[NET_TX_RING_SIZE]; unsigned tx_skb_freelist; spinlock_t rx_lock ____cacheline_aligned_in_smp; @@ -396,6 +397,7 @@ static void xennet_tx_buf_gc(struct net_device *dev) gnttab_release_grant_reference( &np->gref_tx_head, np->grant_tx_ref[id]); np->grant_tx_ref[id] = GRANT_INVALID_REF; + np->grant_tx_page[id] = NULL; add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id); dev_kfree_skb_irq(skb); } @@ -452,6 +454,7 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); + np->grant_tx_page[id] = virt_to_page(data); tx->gref = np->grant_tx_ref[id] = ref; tx->offset = offset; tx->size = len; @@ -497,6 +500,7 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, np->xbdev->otherend_id, mfn, GNTMAP_readonly); + np->grant_tx_page[id] = page; tx->gref = np->grant_tx_ref[id] = ref; tx->offset = offset; tx->size = bytes; @@ -596,6 +600,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) mfn = virt_to_mfn(data); gnttab_grant_foreign_access_ref( ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); + np->grant_tx_page[id] = virt_to_page(data); tx->gref = np->grant_tx_ref[id] = ref; tx->offset = offset; tx->size = len; @@ -1085,10 +1090,11 @@ static void xennet_release_tx_bufs(struct netfront_info *np) continue; skb = np->tx_skbs[i].skb; - gnttab_end_foreign_access_ref(np->grant_tx_ref[i], - GNTMAP_readonly); - gnttab_release_grant_reference(&np->gref_tx_head, - np->grant_tx_ref[i]); + get_page(np->grant_tx_page[i]); + gnttab_end_foreign_access(np->grant_tx_ref[i], + GNTMAP_readonly, + (unsigned long)page_address(np->grant_tx_page[i])); + np->grant_tx_page[i] = NULL; np->grant_tx_ref[i] = GRANT_INVALID_REF; add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i); dev_kfree_skb_irq(skb); @@ -1097,78 +1103,35 @@ static void xennet_release_tx_bufs(struct netfront_info *np) static void xennet_release_rx_bufs(struct netfront_info *np) { - struct mmu_update *mmu = np->rx_mmu; - struct multicall_entry *mcl = np->rx_mcl; - struct sk_buff_head free_list; - struct sk_buff *skb; - unsigned long mfn; - int xfer = 0, noxfer = 0, unused = 0; int id, ref; - dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n", - __func__); - return; - - skb_queue_head_init(&free_list); - spin_lock_bh(&np->rx_lock); for (id = 0; id < NET_RX_RING_SIZE; id++) { - ref = np->grant_rx_ref[id]; - if (ref == GRANT_INVALID_REF) { - unused++; - continue; - } + struct sk_buff *skb; + struct page *page; skb = np->rx_skbs[id]; - mfn = gnttab_end_foreign_transfer_ref(ref); - gnttab_release_grant_reference(&np->gref_rx_head, ref); + if (!skb) + continue; + + ref = np->grant_rx_ref[id]; + if (ref == GRANT_INVALID_REF) + continue; + + page = skb_frag_page(&skb_shinfo(skb)->frags[0]); + + /* gnttab_end_foreign_access() needs a page ref until + * foreign access is ended (which may be deferred). + */ + get_page(page); + gnttab_end_foreign_access(ref, 0, + (unsigned long)page_address(page)); np->grant_rx_ref[id] = GRANT_INVALID_REF; - if (0 == mfn) { - skb_shinfo(skb)->nr_frags = 0; - dev_kfree_skb(skb); - noxfer++; - continue; - } - - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* Remap the page. */ - const struct page *page = - skb_frag_page(&skb_shinfo(skb)->frags[0]); - unsigned long pfn = page_to_pfn(page); - void *vaddr = page_address(page); - - MULTI_update_va_mapping(mcl, (unsigned long)vaddr, - mfn_pte(mfn, PAGE_KERNEL), - 0); - mcl++; - mmu->ptr = ((u64)mfn << PAGE_SHIFT) - | MMU_MACHPHYS_UPDATE; - mmu->val = pfn; - mmu++; - - set_phys_to_machine(pfn, mfn); - } - __skb_queue_tail(&free_list, skb); - xfer++; + kfree_skb(skb); } - dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n", - __func__, xfer, noxfer, unused); - - if (xfer) { - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* Do all the remapping work and M2P updates. */ - MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu, - NULL, DOMID_SELF); - mcl++; - HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl); - } - } - - __skb_queue_purge(&free_list); - spin_unlock_bh(&np->rx_lock); } @@ -1339,6 +1302,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) for (i = 0; i < NET_RX_RING_SIZE; i++) { np->rx_skbs[i] = NULL; np->grant_rx_ref[i] = GRANT_INVALID_REF; + np->grant_tx_page[i] = NULL; } /* A grant for every tx ring slot */ From 3d9667a9e1e463c107cb47a810ef7e85d9a31e62 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 27 Jan 2014 23:11:09 -0500 Subject: [PATCH 18/32] i40e: Add missing braces to i40e_dcb_need_reconfig() Indentation mismatch spotted with Coverity. Introduced in 4e3b35b044ea ("i40e: add DCB and DCBNL support") Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index a4b940862b83..b901371ca361 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4440,9 +4440,10 @@ bool i40e_dcb_need_reconfig(struct i40e_pf *pf, /* Check if APP Table has changed */ if (memcmp(&new_cfg->app, &old_cfg->app, - sizeof(new_cfg->app))) + sizeof(new_cfg->app))) { need_reconfig = true; dev_info(&pf->pdev->dev, "APP Table change detected.\n"); + } return need_reconfig; } From c0c0c50ff7c3e331c90bab316d21f724fb9e1994 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Tue, 28 Jan 2014 11:49:43 +0800 Subject: [PATCH 19/32] net: gre: use icmp_hdr() to get inner ip header When dealing with icmp messages, the skb->data points the ip header that triggered the sending of the icmp message. In gre_cisco_err(), the parse_gre_header() is called, and the iptunnel_pull_header() is called to pull the skb at the end of the parse_gre_header(), so the skb->data doesn't point the inner ip header. Unfortunately, the ipgre_err still needs those ip addresses in inner ip header to look up tunnel by ip_tunnel_lookup(). So just use icmp_hdr() to get inner ip header instead of skb->data. Signed-off-by: Duan Jiong Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index e7a92fdb36f6..ec4f762efda5 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -178,7 +178,7 @@ static int ipgre_err(struct sk_buff *skb, u32 info, else itn = net_generic(net, ipgre_net_id); - iph = (const struct iphdr *)skb->data; + iph = (const struct iphdr *)(icmp_hdr(skb) + 1); t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, iph->daddr, iph->saddr, tpi->key); From d922e1cb1ea17ac7f0a5c3c2be98d4bd80d055b8 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 28 Jan 2014 15:26:42 +1100 Subject: [PATCH 20/32] net: Document promote_secondaries From 038a821667f62c496f2bbae27081b1b612122a97 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 28 Jan 2014 15:16:49 +1100 Subject: [PATCH] net: Document promote_secondaries This option was added a long time ago... commit 8f937c6099858eee15fae14009dcbd05177fa91d Author: Harald Welte Date: Sun May 29 20:23:46 2005 -0700 [IPV4]: Primary and secondary addresses Signed-off-by: Martin Schwenke Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 5de03740cdd5..ab42c95f9985 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1088,6 +1088,12 @@ igmpv3_unsolicited_report_interval - INTEGER IGMPv3 report retransmit will take place. Default: 1000 (1 seconds) +promote_secondaries - BOOLEAN + When a primary IP address is removed from this interface + promote a corresponding secondary IP address instead of + removing all the corresponding secondary IP addresses. + + tag - INTEGER Allows you to write a number, which can be used as required. Default value is 0. From 462bed4870a813bb3ab33254de70757a92d0dc69 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Tue, 28 Jan 2014 11:55:27 -0500 Subject: [PATCH 21/32] qlcnic: Correct off-by-one errors in bounds checks o Bound checks should be >= instead of > for number of receive descriptors and number of receive rings. Signed-off-by: Manish Chopra Signed-off-by: Jitendra Kalsaria Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 30874cda8476..19e1a93bac67 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -1150,13 +1150,13 @@ qlcnic_process_lro(struct qlcnic_adapter *adapter, u16 lro_length, length, data_offset, t_vid, vid = 0xffff; u32 seq_number; - if (unlikely(ring > adapter->max_rds_rings)) + if (unlikely(ring >= adapter->max_rds_rings)) return NULL; rds_ring = &recv_ctx->rds_rings[ring]; index = qlcnic_get_lro_sts_refhandle(sts_data0); - if (unlikely(index > rds_ring->num_desc)) + if (unlikely(index >= rds_ring->num_desc)) return NULL; buffer = &rds_ring->rx_buf_arr[index]; @@ -1662,13 +1662,13 @@ qlcnic_83xx_process_lro(struct qlcnic_adapter *adapter, u16 vid = 0xffff; int err; - if (unlikely(ring > adapter->max_rds_rings)) + if (unlikely(ring >= adapter->max_rds_rings)) return NULL; rds_ring = &recv_ctx->rds_rings[ring]; index = qlcnic_83xx_hndl(sts_data[0]); - if (unlikely(index > rds_ring->num_desc)) + if (unlikely(index >= rds_ring->num_desc)) return NULL; buffer = &rds_ring->rx_buf_arr[index]; From bcf6cb1aa415055749d855eead774896141eb5d8 Mon Sep 17 00:00:00 2001 From: Rajesh Borundia Date: Tue, 28 Jan 2014 11:55:28 -0500 Subject: [PATCH 22/32] qlcnic: Fix initialization of vlan list. o Do not re-initialize vlan list in case of adapter reset. Signed-off-by: Rajesh Borundia Signed-off-by: David S. Miller --- .../net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index 17a1ca2050f4..0638c1810d54 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -448,8 +448,7 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, return 0; } -static int qlcnic_sriov_get_vf_acl(struct qlcnic_adapter *adapter, - struct qlcnic_info *info) +static int qlcnic_sriov_get_vf_acl(struct qlcnic_adapter *adapter) { struct qlcnic_sriov *sriov = adapter->ahw->sriov; struct qlcnic_cmd_args cmd; @@ -495,10 +494,6 @@ static int qlcnic_sriov_vf_init_driver(struct qlcnic_adapter *adapter) if (err) return -EIO; - err = qlcnic_sriov_get_vf_acl(adapter, &nic_info); - if (err) - return err; - if (qlcnic_83xx_get_port_info(adapter)) return -EIO; @@ -555,6 +550,10 @@ static int qlcnic_sriov_setup_vf(struct qlcnic_adapter *adapter, if (err) goto err_out_send_channel_term; + err = qlcnic_sriov_get_vf_acl(adapter); + if (err) + goto err_out_send_channel_term; + err = qlcnic_setup_netdev(adapter, adapter->netdev, pci_using_dac); if (err) goto err_out_send_channel_term; From 060d0564a9152f210183aebad41c41794e18f419 Mon Sep 17 00:00:00 2001 From: Rajesh Borundia Date: Tue, 28 Jan 2014 11:55:29 -0500 Subject: [PATCH 23/32] qlcnic: Fix tx timeout. o __qlcnic_down call's netif_tx_disable which in turn stops all the TX queues, corresponding start queue was missing in __qlcnic_up which was leading to tx timeout. o The commit b84caae486135d588fb200973b0be8cb8a511edf (qlcnic: Fix usage of netif_tx_{wake, stop} api during link change.) exposed this issue. Signed-off-by: Rajesh Borundia Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 1f79d47c45fa..ba78c7481fa3 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -1837,6 +1837,7 @@ int __qlcnic_up(struct qlcnic_adapter *adapter, struct net_device *netdev) qlcnic_linkevent_request(adapter, 1); adapter->ahw->reset_context = 0; + netif_tx_start_all_queues(netdev); return 0; } @@ -2704,14 +2705,8 @@ static int qlcnic_open(struct net_device *netdev) err = __qlcnic_up(adapter, netdev); if (err) - goto err_out; + qlcnic_detach(adapter); - netif_tx_start_all_queues(netdev); - - return 0; - -err_out: - qlcnic_detach(adapter); return err; } From 092dfcf347241576a98d3b1bb4b9b98e0faeb801 Mon Sep 17 00:00:00 2001 From: Shahed Shaikh Date: Tue, 28 Jan 2014 11:55:30 -0500 Subject: [PATCH 24/32] qlcnic: Fix loopback test failure Driver was returning from link event handler without setting linkup variable Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 19e1a93bac67..54ebf300332a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -683,12 +683,17 @@ void qlcnic_advert_link_change(struct qlcnic_adapter *adapter, int linkup) adapter->ahw->linkup = 0; netif_carrier_off(netdev); } else if (!adapter->ahw->linkup && linkup) { - /* Do not advertise Link up if the port is in loopback mode */ - if (qlcnic_83xx_check(adapter) && adapter->ahw->lb_mode) + adapter->ahw->linkup = 1; + + /* Do not advertise Link up to the stack if device + * is in loopback mode + */ + if (qlcnic_83xx_check(adapter) && adapter->ahw->lb_mode) { + netdev_info(netdev, "NIC Link is up for loopback test\n"); return; + } netdev_info(netdev, "NIC Link is up\n"); - adapter->ahw->linkup = 1; netif_carrier_on(netdev); } } From 0f1a24c9a9f4682dd61f5c39b9952f915c5e952c Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 28 Jan 2014 16:30:52 -0500 Subject: [PATCH 25/32] llc: remove noisy WARN from llc_mac_hdr_init Sending malformed llc packets triggers this spew, which seems excessive. WARNING: CPU: 1 PID: 6917 at net/llc/llc_output.c:46 llc_mac_hdr_init+0x85/0x90 [llc]() device type not supported: 0 CPU: 1 PID: 6917 Comm: trinity-c1 Not tainted 3.13.0+ #95 0000000000000009 00000000007e257d ffff88009232fbe8 ffffffffac737325 ffff88009232fc30 ffff88009232fc20 ffffffffac06d28d ffff88020e07f180 ffff88009232fec0 00000000000000c8 0000000000000000 ffff88009232fe70 Call Trace: [] dump_stack+0x4e/0x7a [] warn_slowpath_common+0x7d/0xa0 [] warn_slowpath_fmt+0x5c/0x80 [] llc_mac_hdr_init+0x85/0x90 [llc] [] llc_build_and_send_ui_pkt+0x79/0x90 [llc] [] llc_ui_sendmsg+0x23a/0x400 [llc2] [] sock_sendmsg+0x9c/0xe0 [] ? might_fault+0x47/0x50 [] SYSC_sendto+0x121/0x1c0 [] ? syscall_trace_enter+0x207/0x270 [] SyS_sendto+0xe/0x10 [] tracesys+0xdd/0xe2 Until 2009, this was a printk, when it was changed in bf9ae5386bc: "llc: use dev_hard_header". Let userland figure out what -EINVAL means by itself. Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/llc/llc_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c index 2dae8a5df23f..94425e421213 100644 --- a/net/llc/llc_output.c +++ b/net/llc/llc_output.c @@ -43,7 +43,7 @@ int llc_mac_hdr_init(struct sk_buff *skb, rc = 0; break; default: - WARN(1, "device type not supported: %d\n", skb->dev->type); + break; } return rc; } From 7fceb4de75f993a598d27af835e87b19b8be514e Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Wed, 29 Jan 2014 01:05:28 +0900 Subject: [PATCH 26/32] net: Fix warning on make htmldocs caused by skbuff.c This patch fixed following Warning while executing "make htmldocs". Warning(/net/core/skbuff.c:2164): No description found for parameter 'from' Warning(/net/core/skbuff.c:2164): Excess function parameter 'source' description in 'skb_zerocopy' Replace "@source" with "@from" fixed the warning. Signed-off-by: Masanari Iida Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 9ae6d11374d1..5976ef0846bd 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2121,7 +2121,7 @@ EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); /** * skb_zerocopy - Zero copy skb to skb * @to: destination buffer - * @source: source buffer + * @from: source buffer * @len: number of bytes to copy from source buffer * @hlen: size of linear headroom in destination buffer * From 33f9e6f57eb91cafcee4d8f185487bce6787957b Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Tue, 28 Jan 2014 17:28:51 +0200 Subject: [PATCH 27/32] bnx2x: Fix generic option settings When user tried to change generic options using "ethtool -s" command, while SFP module is plugged out or during module detection, the command would have failed with "Unsupported port type" message. The fix is to ignore the port option in case it's same as the current port configuration. Signed-off-by: Yaniv Rosner Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- .../ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 78 +++++++++---------- 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 92a467ff4104..38fc794c1655 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -358,49 +358,47 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) cfg_idx = bnx2x_get_link_cfg_idx(bp); old_multi_phy_config = bp->link_params.multi_phy_config; - switch (cmd->port) { - case PORT_TP: - if (bp->port.supported[cfg_idx] & SUPPORTED_TP) - break; /* no port change */ - - if (!(bp->port.supported[0] & SUPPORTED_TP || - bp->port.supported[1] & SUPPORTED_TP)) { + if (cmd->port != bnx2x_get_port_type(bp)) { + switch (cmd->port) { + case PORT_TP: + if (!(bp->port.supported[0] & SUPPORTED_TP || + bp->port.supported[1] & SUPPORTED_TP)) { + DP(BNX2X_MSG_ETHTOOL, + "Unsupported port type\n"); + return -EINVAL; + } + bp->link_params.multi_phy_config &= + ~PORT_HW_CFG_PHY_SELECTION_MASK; + if (bp->link_params.multi_phy_config & + PORT_HW_CFG_PHY_SWAPPED_ENABLED) + bp->link_params.multi_phy_config |= + PORT_HW_CFG_PHY_SELECTION_SECOND_PHY; + else + bp->link_params.multi_phy_config |= + PORT_HW_CFG_PHY_SELECTION_FIRST_PHY; + break; + case PORT_FIBRE: + case PORT_DA: + if (!(bp->port.supported[0] & SUPPORTED_FIBRE || + bp->port.supported[1] & SUPPORTED_FIBRE)) { + DP(BNX2X_MSG_ETHTOOL, + "Unsupported port type\n"); + return -EINVAL; + } + bp->link_params.multi_phy_config &= + ~PORT_HW_CFG_PHY_SELECTION_MASK; + if (bp->link_params.multi_phy_config & + PORT_HW_CFG_PHY_SWAPPED_ENABLED) + bp->link_params.multi_phy_config |= + PORT_HW_CFG_PHY_SELECTION_FIRST_PHY; + else + bp->link_params.multi_phy_config |= + PORT_HW_CFG_PHY_SELECTION_SECOND_PHY; + break; + default: DP(BNX2X_MSG_ETHTOOL, "Unsupported port type\n"); return -EINVAL; } - bp->link_params.multi_phy_config &= - ~PORT_HW_CFG_PHY_SELECTION_MASK; - if (bp->link_params.multi_phy_config & - PORT_HW_CFG_PHY_SWAPPED_ENABLED) - bp->link_params.multi_phy_config |= - PORT_HW_CFG_PHY_SELECTION_SECOND_PHY; - else - bp->link_params.multi_phy_config |= - PORT_HW_CFG_PHY_SELECTION_FIRST_PHY; - break; - case PORT_FIBRE: - case PORT_DA: - if (bp->port.supported[cfg_idx] & SUPPORTED_FIBRE) - break; /* no port change */ - - if (!(bp->port.supported[0] & SUPPORTED_FIBRE || - bp->port.supported[1] & SUPPORTED_FIBRE)) { - DP(BNX2X_MSG_ETHTOOL, "Unsupported port type\n"); - return -EINVAL; - } - bp->link_params.multi_phy_config &= - ~PORT_HW_CFG_PHY_SELECTION_MASK; - if (bp->link_params.multi_phy_config & - PORT_HW_CFG_PHY_SWAPPED_ENABLED) - bp->link_params.multi_phy_config |= - PORT_HW_CFG_PHY_SELECTION_FIRST_PHY; - else - bp->link_params.multi_phy_config |= - PORT_HW_CFG_PHY_SELECTION_SECOND_PHY; - break; - default: - DP(BNX2X_MSG_ETHTOOL, "Unsupported port type\n"); - return -EINVAL; } /* Save new config in case command complete successfully */ new_multi_phy_config = bp->link_params.multi_phy_config; From 731ff244236ad37dbb4a4b2adc7e2c104c01153f Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 28 Jan 2014 02:47:41 +0300 Subject: [PATCH 28/32] DT: net: davinci_emac: "ti, davinci-rmii-en" property is actually optional Though described as required, the "ti,davinci-rmii-en" property for the DaVinci EMAC binding seems actually optional, as the driver should happily work without it; the property is not specified either in the example device node or in the actual EMAC device node for DA850 device tree, only AM3517 one. While at it, document the property better... Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/davinci_emac.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/davinci_emac.txt b/Documentation/devicetree/bindings/net/davinci_emac.txt index ca0911a20e8b..49103e457d9d 100644 --- a/Documentation/devicetree/bindings/net/davinci_emac.txt +++ b/Documentation/devicetree/bindings/net/davinci_emac.txt @@ -10,7 +10,6 @@ Required properties: - ti,davinci-ctrl-mod-reg-offset: offset to control module register - ti,davinci-ctrl-ram-offset: offset to control module ram - ti,davinci-ctrl-ram-size: size of control module ram -- ti,davinci-rmii-en: use RMII - ti,davinci-no-bd-ram: has the emac controller BD RAM - interrupts: interrupt mapping for the davinci emac interrupts sources: 4 sources: Date: Tue, 28 Jan 2014 02:49:39 +0300 Subject: [PATCH 29/32] DT: net: davinci_emac: "ti, davinci-no-bd-ram" property is actually optional The "ti,davinci-no-bd-ram" property for the DaVinci EMAC binding simply can't be required one, as it's boolean (which means it's absent if false). While at it, document the property better... Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/davinci_emac.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/davinci_emac.txt b/Documentation/devicetree/bindings/net/davinci_emac.txt index 49103e457d9d..6e356d15154a 100644 --- a/Documentation/devicetree/bindings/net/davinci_emac.txt +++ b/Documentation/devicetree/bindings/net/davinci_emac.txt @@ -10,7 +10,6 @@ Required properties: - ti,davinci-ctrl-mod-reg-offset: offset to control module register - ti,davinci-ctrl-ram-offset: offset to control module ram - ti,davinci-ctrl-ram-size: size of control module ram -- ti,davinci-no-bd-ram: has the emac controller BD RAM - interrupts: interrupt mapping for the davinci emac interrupts sources: 4 sources: Date: Wed, 29 Jan 2014 16:43:31 +0900 Subject: [PATCH 30/32] tun: add device name(iff) field to proc fdinfo entry A file descriptor opened for /dev/net/tun and a tun device are connected with ioctl. Though understanding the connection is important for trouble shooting, no way is given to a user to know the connected device for a given file descriptor at userland. This patch adds a new fdinfo field for the device name connected to a file descriptor opened for /dev/net/tun. Here is an example of the field: # lsof | grep tun qemu-syst 4565 qemu 25u CHR 10,200 0t138 12921 /dev/net/tun ... # cat /proc/4565/fdinfo/25 pos: 138 flags: 0104002 iff: vnet0 # ip link show dev vnet0 8: vnet0: mtu 1500 ... changelog: v2: indent iff just like the other fdinfo fields are. v3: remove unused variable. Both are suggested by David Miller . Signed-off-by: Masatake YAMATO Signed-off-by: David S. Miller --- drivers/net/tun.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index bcf01af4b879..44c4db8450f0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -69,6 +69,7 @@ #include #include #include +#include #include @@ -2228,6 +2229,27 @@ static int tun_chr_close(struct inode *inode, struct file *file) return 0; } +#ifdef CONFIG_PROC_FS +static int tun_chr_show_fdinfo(struct seq_file *m, struct file *f) +{ + struct tun_struct *tun; + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + + rtnl_lock(); + tun = tun_get(f); + if (tun) + tun_get_iff(current->nsproxy->net_ns, tun, &ifr); + rtnl_unlock(); + + if (tun) + tun_put(tun); + + return seq_printf(m, "iff:\t%s\n", ifr.ifr_name); +} +#endif + static const struct file_operations tun_fops = { .owner = THIS_MODULE, .llseek = no_llseek, @@ -2242,7 +2264,10 @@ static const struct file_operations tun_fops = { #endif .open = tun_chr_open, .release = tun_chr_close, - .fasync = tun_chr_fasync + .fasync = tun_chr_fasync, +#ifdef CONFIG_PROC_FS + .show_fdinfo = tun_chr_show_fdinfo, +#endif }; static struct miscdevice tun_miscdev = { From 6fde8f037e604e05df1529e4689041715d6d55d2 Mon Sep 17 00:00:00 2001 From: Ding Tianhong Date: Tue, 28 Jan 2014 11:48:53 +0800 Subject: [PATCH 31/32] bonding: fix locking in bond_loadbalance_arp_mon() The commit 1d3ee88ae0d605629bf369 (bonding: add netlink attributes to slave link dev) has add rtmsg_ifinfo() in bond_set_active_slave() and bond_set_backup_slave(), so the two function need to called in RTNL lock, but bond_loadbalance_arp_mon() only calling these functions in RCU, warning message will occurs. fix this by add a new function bond_slave_state_change(), which will reset the slave's state after slave link check, so remove the bond_set_xxx_slave() from the cycle and only record the slave_state_changed, this will call the new function to set all slaves to new state in RTNL later. Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 29 +++++++++++++++++------------ drivers/net/bonding/bonding.h | 13 +++++++++++++ 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index dd75615d85f2..4c08018d7333 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2346,7 +2346,7 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) arp_work.work); struct slave *slave, *oldcurrent; struct list_head *iter; - int do_failover = 0; + int do_failover = 0, slave_state_changed = 0; if (!bond_has_slaves(bond)) goto re_arm; @@ -2370,7 +2370,7 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) bond_time_in_interval(bond, slave->dev->last_rx, 1)) { slave->link = BOND_LINK_UP; - bond_set_active_slave(slave); + slave_state_changed = 1; /* primary_slave has no meaning in round-robin * mode. the window of a slave being up and @@ -2399,7 +2399,7 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) !bond_time_in_interval(bond, slave->dev->last_rx, 2)) { slave->link = BOND_LINK_DOWN; - bond_set_backup_slave(slave); + slave_state_changed = 1; if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; @@ -2426,19 +2426,24 @@ static void bond_loadbalance_arp_mon(struct work_struct *work) rcu_read_unlock(); - if (do_failover) { - /* the bond_select_active_slave must hold RTNL - * and curr_slave_lock for write. - */ + if (do_failover || slave_state_changed) { if (!rtnl_trylock()) goto re_arm; - block_netpoll_tx(); - write_lock_bh(&bond->curr_slave_lock); - bond_select_active_slave(bond); + if (slave_state_changed) { + bond_slave_state_change(bond); + } else if (do_failover) { + /* the bond_select_active_slave must hold RTNL + * and curr_slave_lock for write. + */ + block_netpoll_tx(); + write_lock_bh(&bond->curr_slave_lock); - write_unlock_bh(&bond->curr_slave_lock); - unblock_netpoll_tx(); + bond_select_active_slave(bond); + + write_unlock_bh(&bond->curr_slave_lock); + unblock_netpoll_tx(); + } rtnl_unlock(); } diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 1a9062f4e0d6..86ccfb9f71cc 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -303,6 +303,19 @@ static inline void bond_set_backup_slave(struct slave *slave) } } +static inline void bond_slave_state_change(struct bonding *bond) +{ + struct list_head *iter; + struct slave *tmp; + + bond_for_each_slave(bond, tmp, iter) { + if (tmp->link == BOND_LINK_UP) + bond_set_active_slave(tmp); + else if (tmp->link == BOND_LINK_DOWN) + bond_set_backup_slave(tmp); + } +} + static inline int bond_slave_state(struct slave *slave) { return slave->backup; From c044dc2132d19d8c643cdd340f21afcec177c046 Mon Sep 17 00:00:00 2001 From: Eugene Crosser Date: Wed, 29 Jan 2014 09:23:48 +0100 Subject: [PATCH 32/32] qeth: fix build of s390 allmodconfig commit 949efd1c "qeth: bridgeport support - basic control" broke s390 allmodconfig. This patch fixes this by eliminating one of the cross-module calls, and by making two other calls via function pointers in the qeth_discipline structure. Signed-off-by: Eugene Crosser Signed-off-by: Frank Blaschka Reported-by: Paul Gortmaker Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 5 ++-- drivers/s390/net/qeth_core_main.c | 18 +++++--------- drivers/s390/net/qeth_l2_main.c | 41 ++++++++++++++++++++++++++----- drivers/s390/net/qeth_l3_main.c | 8 ++++++ 4 files changed, 51 insertions(+), 21 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index ac0bdded060f..a0de045eb227 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -738,6 +738,8 @@ struct qeth_discipline { int (*freeze)(struct ccwgroup_device *); int (*thaw) (struct ccwgroup_device *); int (*restore)(struct ccwgroup_device *); + int (*control_event_handler)(struct qeth_card *card, + struct qeth_ipa_cmd *cmd); }; struct qeth_vlan_vid { @@ -948,13 +950,10 @@ int qeth_query_card_info(struct qeth_card *card, int qeth_send_control_data(struct qeth_card *, int, struct qeth_cmd_buffer *, int (*reply_cb)(struct qeth_card *, struct qeth_reply*, unsigned long), void *reply_param); -void qeth_bridge_state_change(struct qeth_card *card, struct qeth_ipa_cmd *cmd); -void qeth_bridgeport_query_support(struct qeth_card *card); int qeth_bridgeport_query_ports(struct qeth_card *card, enum qeth_sbp_roles *role, enum qeth_sbp_states *state); int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role); int qeth_bridgeport_an_set(struct qeth_card *card, int enable); -void qeth_bridge_host_event(struct qeth_card *card, struct qeth_ipa_cmd *cmd); int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int); int qeth_get_elements_no(struct qeth_card *, struct sk_buff *, int); int qeth_get_elements_for_frags(struct sk_buff *); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index c05dacbf4e23..c3a83df07894 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -69,6 +69,7 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue, static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *, int); struct workqueue_struct *qeth_wq; +EXPORT_SYMBOL_GPL(qeth_wq); static void qeth_close_dev_handler(struct work_struct *work) { @@ -616,15 +617,12 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card, qeth_schedule_recovery(card); return NULL; case IPA_CMD_SETBRIDGEPORT: - if (cmd->data.sbp.hdr.command_code == - IPA_SBP_BRIDGE_PORT_STATE_CHANGE) { - qeth_bridge_state_change(card, cmd); - return NULL; - } else - return cmd; case IPA_CMD_ADDRESS_CHANGE_NOTIF: - qeth_bridge_host_event(card, cmd); - return NULL; + if (card->discipline->control_event_handler + (card, cmd)) + return cmd; + else + return NULL; case IPA_CMD_MODCCID: return cmd; case IPA_CMD_REGISTER_LOCAL_ADDR: @@ -4973,10 +4971,6 @@ retriable: qeth_query_setadapterparms(card); if (qeth_adp_supported(card, IPA_SETADP_SET_DIAG_ASSIST)) qeth_query_setdiagass(card); - qeth_bridgeport_query_support(card); - if (card->options.sbp.supported_funcs) - dev_info(&card->gdev->dev, - "The device represents a HiperSockets Bridge Capable Port\n"); return 0; out: dev_warn(&card->gdev->dev, "The qeth device driver failed to recover " diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 914d2c121fd8..0710550093ce 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -33,6 +33,11 @@ static int qeth_l2_send_setdelmac(struct qeth_card *, __u8 *, unsigned long)); static void qeth_l2_set_multicast_list(struct net_device *); static int qeth_l2_recover(void *); +static void qeth_bridgeport_query_support(struct qeth_card *card); +static void qeth_bridge_state_change(struct qeth_card *card, + struct qeth_ipa_cmd *cmd); +static void qeth_bridge_host_event(struct qeth_card *card, + struct qeth_ipa_cmd *cmd); static int qeth_l2_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { @@ -989,6 +994,10 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) rc = -ENODEV; goto out_remove; } + qeth_bridgeport_query_support(card); + if (card->options.sbp.supported_funcs) + dev_info(&card->gdev->dev, + "The device represents a HiperSockets Bridge Capable Port\n"); qeth_trace_features(card); if (!card->dev && qeth_l2_setup_netdev(card)) { @@ -1233,6 +1242,26 @@ out: return rc; } +/* Returns zero if the command is successfully "consumed" */ +static int qeth_l2_control_event(struct qeth_card *card, + struct qeth_ipa_cmd *cmd) +{ + switch (cmd->hdr.command) { + case IPA_CMD_SETBRIDGEPORT: + if (cmd->data.sbp.hdr.command_code == + IPA_SBP_BRIDGE_PORT_STATE_CHANGE) { + qeth_bridge_state_change(card, cmd); + return 0; + } else + return 1; + case IPA_CMD_ADDRESS_CHANGE_NOTIF: + qeth_bridge_host_event(card, cmd); + return 0; + default: + return 1; + } +} + struct qeth_discipline qeth_l2_discipline = { .start_poll = qeth_qdio_start_poll, .input_handler = (qdio_handler_t *) qeth_qdio_input_handler, @@ -1246,6 +1275,7 @@ struct qeth_discipline qeth_l2_discipline = { .freeze = qeth_l2_pm_suspend, .thaw = qeth_l2_pm_resume, .restore = qeth_l2_pm_resume, + .control_event_handler = qeth_l2_control_event, }; EXPORT_SYMBOL_GPL(qeth_l2_discipline); @@ -1463,7 +1493,8 @@ static void qeth_bridge_state_change_worker(struct work_struct *work) kfree(data); } -void qeth_bridge_state_change(struct qeth_card *card, struct qeth_ipa_cmd *cmd) +static void qeth_bridge_state_change(struct qeth_card *card, + struct qeth_ipa_cmd *cmd) { struct qeth_sbp_state_change *qports = &cmd->data.sbp.data.state_change; @@ -1488,7 +1519,6 @@ void qeth_bridge_state_change(struct qeth_card *card, struct qeth_ipa_cmd *cmd) sizeof(struct qeth_sbp_state_change) + extrasize); queue_work(qeth_wq, &data->worker); } -EXPORT_SYMBOL(qeth_bridge_state_change); struct qeth_bridge_host_data { struct work_struct worker; @@ -1528,7 +1558,8 @@ static void qeth_bridge_host_event_worker(struct work_struct *work) kfree(data); } -void qeth_bridge_host_event(struct qeth_card *card, struct qeth_ipa_cmd *cmd) +static void qeth_bridge_host_event(struct qeth_card *card, + struct qeth_ipa_cmd *cmd) { struct qeth_ipacmd_addr_change *hostevs = &cmd->data.addrchange; @@ -1560,7 +1591,6 @@ void qeth_bridge_host_event(struct qeth_card *card, struct qeth_ipa_cmd *cmd) sizeof(struct qeth_ipacmd_addr_change) + extrasize); queue_work(qeth_wq, &data->worker); } -EXPORT_SYMBOL(qeth_bridge_host_event); /* SETBRIDGEPORT support; sending commands */ @@ -1683,7 +1713,7 @@ static int qeth_bridgeport_query_support_cb(struct qeth_card *card, * Sets bitmask of supported setbridgeport subfunctions in the qeth_card * strucutre: card->options.sbp.supported_funcs. */ -void qeth_bridgeport_query_support(struct qeth_card *card) +static void qeth_bridgeport_query_support(struct qeth_card *card) { struct qeth_cmd_buffer *iob; struct qeth_ipa_cmd *cmd; @@ -1709,7 +1739,6 @@ void qeth_bridgeport_query_support(struct qeth_card *card) } card->options.sbp.supported_funcs = cbctl.data.supported; } -EXPORT_SYMBOL_GPL(qeth_bridgeport_query_support); static int qeth_bridgeport_query_ports_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index c1b0b2761f8d..0f430424c3b8 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3593,6 +3593,13 @@ out: return rc; } +/* Returns zero if the command is successfully "consumed" */ +static int qeth_l3_control_event(struct qeth_card *card, + struct qeth_ipa_cmd *cmd) +{ + return 1; +} + struct qeth_discipline qeth_l3_discipline = { .start_poll = qeth_qdio_start_poll, .input_handler = (qdio_handler_t *) qeth_qdio_input_handler, @@ -3606,6 +3613,7 @@ struct qeth_discipline qeth_l3_discipline = { .freeze = qeth_l3_pm_suspend, .thaw = qeth_l3_pm_resume, .restore = qeth_l3_pm_resume, + .control_event_handler = qeth_l3_control_event, }; EXPORT_SYMBOL_GPL(qeth_l3_discipline);