From 9e65dc371b5c8d7476c81353137efc13cc1bdabd Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 28 Jan 2014 14:52:47 +0200 Subject: [PATCH 01/20] IB/mlx5: Fix RC transport send queue overhead computation Fix the RC QPs send queue overhead computation to take into account two additional segments in the WQE which are needed for registration operations. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index ae37fb9bf262..492dc330e907 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -216,7 +216,9 @@ static int sq_overhead(enum ib_qp_type qp_type) case IB_QPT_UC: size += sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg); + sizeof(struct mlx5_wqe_raddr_seg) + + sizeof(struct mlx5_wqe_umr_ctrl_seg) + + sizeof(struct mlx5_mkey_seg); break; case IB_QPT_UD: From 78c0f98cc9dd46824fa66f35f14ea24ba733d145 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 30 Jan 2014 13:49:48 +0200 Subject: [PATCH 02/20] IB/mlx5: Fix binary compatibility with libmlx5 Commit c1be5232d21d ("Fix micro UAR allocator") broke binary compatibility between libmlx5 and mlx5_ib since it defines a different value to the number of micro UARs per page, leading to wrong calculation in libmlx5. This patch defines struct mlx5_ib_alloc_ucontext_req_v2 as an extension to struct mlx5_ib_alloc_ucontext_req. The extended size is determined in mlx5_ib_alloc_ucontext() and in case of old library we use uuarn 0 which works fine -- this is acheived due to create_user_qp() falling back from high to medium then to low class where low class will return 0. For new libraries we use the more sophisticated allocation algorithm. Signed-off-by: Eli Cohen Reviewed-by: Yann Droneaud Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/main.c | 19 +++++++++++++++++-- drivers/infiniband/hw/mlx5/qp.c | 10 ++++++++-- drivers/infiniband/hw/mlx5/user.h | 7 +++++++ include/linux/mlx5/driver.h | 1 + 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 9660d093f8cf..f4ef4a24d410 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -536,24 +536,38 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_alloc_ucontext_req req; + struct mlx5_ib_alloc_ucontext_req_v2 req; struct mlx5_ib_alloc_ucontext_resp resp; struct mlx5_ib_ucontext *context; struct mlx5_uuar_info *uuari; struct mlx5_uar *uars; int gross_uuars; int num_uars; + int ver; int uuarn; int err; int i; + int reqlen; if (!dev->ib_active) return ERR_PTR(-EAGAIN); - err = ib_copy_from_udata(&req, udata, sizeof(req)); + memset(&req, 0, sizeof(req)); + reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); + if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) + ver = 0; + else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2)) + ver = 2; + else + return ERR_PTR(-EINVAL); + + err = ib_copy_from_udata(&req, udata, reqlen); if (err) return ERR_PTR(err); + if (req.flags || req.reserved) + return ERR_PTR(-EINVAL); + if (req.total_num_uuars > MLX5_MAX_UUARS) return ERR_PTR(-ENOMEM); @@ -626,6 +640,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (err) goto out_uars; + uuari->ver = ver; uuari->num_low_latency_uuars = req.num_low_latency_uuars; uuari->uars = uars; uuari->num_uars = num_uars; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 492dc330e907..091576a777e9 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -430,11 +430,17 @@ static int alloc_uuar(struct mlx5_uuar_info *uuari, break; case MLX5_IB_LATENCY_CLASS_MEDIUM: - uuarn = alloc_med_class_uuar(uuari); + if (uuari->ver < 2) + uuarn = -ENOMEM; + else + uuarn = alloc_med_class_uuar(uuari); break; case MLX5_IB_LATENCY_CLASS_HIGH: - uuarn = alloc_high_class_uuar(uuari); + if (uuari->ver < 2) + uuarn = -ENOMEM; + else + uuarn = alloc_high_class_uuar(uuari); break; case MLX5_IB_LATENCY_CLASS_FAST_PATH: diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index 32a2a5dfc523..0f4f8e42a17f 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -62,6 +62,13 @@ struct mlx5_ib_alloc_ucontext_req { __u32 num_low_latency_uuars; }; +struct mlx5_ib_alloc_ucontext_req_v2 { + __u32 total_num_uuars; + __u32 num_low_latency_uuars; + __u32 flags; + __u32 reserved; +}; + struct mlx5_ib_alloc_ucontext_resp { __u32 qp_tab_size; __u32 bf_reg_size; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 554548cd3dd4..32cb18c399c2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -227,6 +227,7 @@ struct mlx5_uuar_info { * protect uuar allocation data structs */ struct mutex lock; + u32 ver; }; struct mlx5_bf { From 1a4c3a3dc5fdeef2a7bdf4ac7d81df58c3c0a51e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 6 Feb 2014 17:41:25 +0200 Subject: [PATCH 03/20] IB/mlx5: Don't set "block multicast loopback" capability Currently Connect-IB does not support blocking multicast loopback, so don't set IB_DEVICE_BLOCK_MULTICAST_LOOPBACK in the device caps. Reported by: Or Gerlitz Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/main.c | 3 +-- drivers/infiniband/hw/mlx5/qp.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f4ef4a24d410..aa03e732b6a8 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -261,8 +261,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_RC_RNR_NAK_GEN | - IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + IB_DEVICE_RC_RNR_NAK_GEN; flags = dev->mdev.caps.flags; if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 091576a777e9..7dfe8a1c84cf 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -665,8 +665,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, int err; uuari = &dev->mdev.priv.uuari; - if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) - qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK; + if (init_attr->create_flags) + return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) lc = MLX5_IB_LATENCY_CLASS_FAST_PATH; From d07875bd0d1517185534b5f9eef469426ba42cb9 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 29 Dec 2013 23:47:37 +0100 Subject: [PATCH 04/20] RDMA/nes: Fix error return code Set the return variable to an error code as done elsewhere in the function. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 429141078eec..353c7b05a90a 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -675,8 +675,11 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status); /* Initialize network devices */ - if ((netdev = nes_netdev_init(nesdev, mmio_regs)) == NULL) + netdev = nes_netdev_init(nesdev, mmio_regs); + if (netdev == NULL) { + ret = -ENOMEM; goto bail7; + } /* Register network device */ ret = register_netdev(netdev); From ab576627c8f97c08297d81537be17df161171923 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 29 Dec 2013 23:47:29 +0100 Subject: [PATCH 05/20] RDMA/amso1100: Fix error return code Set the return variable to an error code as done elsewhere in the function. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2.c | 4 +++- drivers/infiniband/hw/amso1100/c2_rnic.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c index d53cf519f42a..00400c352c1a 100644 --- a/drivers/infiniband/hw/amso1100/c2.c +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -1082,6 +1082,7 @@ static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) /* Initialize network device */ if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) { + ret = -ENOMEM; iounmap(mmio_regs); goto bail4; } @@ -1151,7 +1152,8 @@ static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) goto bail10; } - if (c2_register_device(c2dev)) + ret = c2_register_device(c2dev); + if (ret) goto bail10; return 0; diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c index b7c986990053..d2a6d961344b 100644 --- a/drivers/infiniband/hw/amso1100/c2_rnic.c +++ b/drivers/infiniband/hw/amso1100/c2_rnic.c @@ -576,7 +576,8 @@ int c2_rnic_init(struct c2_dev *c2dev) goto bail4; /* Initialize cached the adapter limits */ - if (c2_rnic_query(c2dev, &c2dev->props)) + err = c2_rnic_query(c2dev, &c2dev->props); + if (err) goto bail5; /* Initialize the PD pool */ From 4196670be786d529ab7f6c18f5077141ce1b787e Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Sun, 2 Feb 2014 17:06:47 +0200 Subject: [PATCH 06/20] IB/mlx4: Don't allocate range of steerable UD QPs for Ethernet-only device When the device has only Ethernet ports, don't try to allocate range of steerable UD QPs since they aren't needed. This fixes an issue where mlx4 VFs tried to allocate a range of UD steerable QPs, but failed to do so. Fixes: c1c98501121e ("IB/mlx4: Add support for steerable IB UD QPs") Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index c2702f549f10..64ca4087fb52 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1810,6 +1810,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) int i, j; int err; struct mlx4_ib_iboe *iboe; + int ib_num_ports = 0; pr_info_once("%s", mlx4_ib_version); @@ -1985,10 +1986,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->counters[i] = -1; } + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) + ib_num_ports++; + spin_lock_init(&ibdev->sm_lock); mutex_init(&ibdev->cap_mask_mutex); - if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) { + if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED && + ib_num_ports) { ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS; err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count, MLX4_IB_UC_STEER_QPN_ALIGN, From acc4fccf4eff5b29e545995b75de77e60ea44aae Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 5 Feb 2014 15:12:58 +0200 Subject: [PATCH 07/20] IB/mlx4: Make sure GID index 0 is always occupied Make sure that for Ethernet ports, the port GID table index 0 is always occupied with a default GID of the relevant IPv6 link-local adderss. This provides better user experience for legacy applications that don't use the RDMA CM and were working on index 0 prior to the IP addressing change. Also, as GIDs are generated from IP addresses of the network devices that are associated with the port, it's basically possible that the GID table will be empty if no IP address was assigned. This doesn't comply with the IB spec section 4.1.1 "GID usage and properties". Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 92 +++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 64ca4087fb52..b99d0ecc6a89 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1357,6 +1357,21 @@ static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_board_id }; +static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, + struct net_device *dev) +{ + memcpy(eui, dev->dev_addr, 3); + memcpy(eui + 5, dev->dev_addr + 3, 3); + if (vlan_id < 0x1000) { + eui[3] = vlan_id >> 8; + eui[4] = vlan_id & 0xff; + } else { + eui[3] = 0xff; + eui[4] = 0xfe; + } + eui[0] ^= 2; +} + static void update_gids_task(struct work_struct *work) { struct update_gid_work *gw = container_of(work, struct update_gid_work, work); @@ -1425,7 +1440,8 @@ free: } static int update_gid_table(struct mlx4_ib_dev *dev, int port, - union ib_gid *gid, int clear) + union ib_gid *gid, int clear, + int default_gid) { struct update_gid_work *work; int i; @@ -1434,26 +1450,31 @@ static int update_gid_table(struct mlx4_ib_dev *dev, int port, int found = -1; int max_gids; - max_gids = dev->dev->caps.gid_table_len[port]; - for (i = 0; i < max_gids; ++i) { - if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid, - sizeof(*gid))) - found = i; - - if (clear) { - if (found >= 0) { - need_update = 1; - dev->iboe.gid_table[port - 1][found] = zgid; - break; - } - } else { - if (found >= 0) - break; - - if (free < 0 && - !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, + if (default_gid) { + free = 0; + } else { + max_gids = dev->dev->caps.gid_table_len[port]; + for (i = 1; i < max_gids; ++i) { + if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid, sizeof(*gid))) - free = i; + found = i; + + if (clear) { + if (found >= 0) { + need_update = 1; + dev->iboe.gid_table[port - 1][found] = + zgid; + break; + } + } else { + if (found >= 0) + break; + + if (free < 0 && + !memcmp(&dev->iboe.gid_table[port - 1][i], + &zgid, sizeof(*gid))) + free = i; + } } } @@ -1478,6 +1499,13 @@ static int update_gid_table(struct mlx4_ib_dev *dev, int port, return 0; } +static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid) +{ + gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev); +} + + static int reset_gid_table(struct mlx4_ib_dev *dev) { struct update_gid_work *work; @@ -1502,6 +1530,12 @@ static int mlx4_ib_addr_event(int event, struct net_device *event_netdev, struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ? rdma_vlan_dev_real_dev(event_netdev) : event_netdev; + union ib_gid default_gid; + + mlx4_make_default_gid(real_dev, &default_gid); + + if (!memcmp(gid, &default_gid, sizeof(*gid))) + return 0; if (event != NETDEV_DOWN && event != NETDEV_UP) return 0; @@ -1520,7 +1554,7 @@ static int mlx4_ib_addr_event(int event, struct net_device *event_netdev, (!netif_is_bond_master(real_dev) && (real_dev == iboe->netdevs[port - 1]))) update_gid_table(ibdev, port, gid, - event == NETDEV_DOWN); + event == NETDEV_DOWN, 0); spin_unlock(&iboe->lock); return 0; @@ -1607,7 +1641,7 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev, /*ifa->ifa_address;*/ ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid); - update_gid_table(ibdev, port, &gid, 0); + update_gid_table(ibdev, port, &gid, 0, 0); } endfor_ifa(in_dev); in_dev_put(in_dev); @@ -1619,7 +1653,7 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev, read_lock_bh(&in6_dev->lock); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { pgid = (union ib_gid *)&ifp->addr; - update_gid_table(ibdev, port, pgid, 0); + update_gid_table(ibdev, port, pgid, 0, 0); } read_unlock_bh(&in6_dev->lock); in6_dev_put(in6_dev); @@ -1627,6 +1661,14 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev, #endif } +static void mlx4_ib_set_default_gid(struct mlx4_ib_dev *ibdev, + struct net_device *dev, u8 port) +{ + union ib_gid gid; + mlx4_make_default_gid(dev, &gid); + update_gid_table(ibdev, port, &gid, 0, 1); +} + static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) { struct net_device *dev; @@ -1660,7 +1702,9 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) struct net_device *curr_master; iboe->netdevs[port - 1] = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); - + if (iboe->netdevs[port - 1]) + mlx4_ib_set_default_gid(ibdev, + iboe->netdevs[port - 1], port); if (iboe->netdevs[port - 1] && netif_is_bond_slave(iboe->netdevs[port - 1])) { rtnl_lock(); From 4ce5a5744a2f5479e58c6788cbe3987b8071b62e Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 5 Feb 2014 15:12:59 +0200 Subject: [PATCH 08/20] IB/mlx4: Move rtnl locking to the right place On the one hand, the invocation of netdev_master_upper_dev_get() within mlx4_ib_scan_netdevs() must be done with rtnl lock held. On the other hand, it's wrong to call rtnl_lock() from within this function since it's also called by our netdev notifier callback. Therefore move the locking to mlx4_ib_add() so that both cases are covered. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b99d0ecc6a89..93f492f62997 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1707,10 +1707,8 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) iboe->netdevs[port - 1], port); if (iboe->netdevs[port - 1] && netif_is_bond_slave(iboe->netdevs[port - 1])) { - rtnl_lock(); iboe->masters[port - 1] = netdev_master_upper_dev_get( iboe->netdevs[port - 1]); - rtnl_unlock(); } curr_master = iboe->masters[port - 1]; @@ -2100,7 +2098,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) } } #endif + rtnl_lock(); mlx4_ib_scan_netdevs(ibdev); + rtnl_unlock(); mlx4_ib_init_gid_table(ibdev); } From ddf8bd349115c2bc85a62e3d94018c9976ac72f7 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 5 Feb 2014 15:13:00 +0200 Subject: [PATCH 09/20] IB/mlx4: Do IBoE locking earlier when initializing the GID table Updating the GID table under IBoE requires read/write from/to shared data structures. These data structures are protected with the device iboe lock. The flows that modify the GID table start from 1. Initializing the GID table 2. NETDEV events 3. INET or INET6 events This patch makes sure that the flow of initializing the GID table is consistent with the other two flows w.r.t on what step the lock is taken. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 93f492f62997..3bafd0bebd4c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1570,7 +1570,6 @@ static u8 mlx4_ib_get_dev_port(struct net_device *dev, rdma_vlan_dev_real_dev(dev) : dev; iboe = &ibdev->iboe; - spin_lock(&iboe->lock); for (port = 1; port <= MLX4_MAX_PORTS; ++port) if ((netif_is_bond_master(real_dev) && @@ -1579,8 +1578,6 @@ static u8 mlx4_ib_get_dev_port(struct net_device *dev, (real_dev == iboe->netdevs[port - 1]))) break; - spin_unlock(&iboe->lock); - if ((port == 0) || (port > MLX4_MAX_PORTS)) return 0; else @@ -1672,11 +1669,13 @@ static void mlx4_ib_set_default_gid(struct mlx4_ib_dev *ibdev, static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) { struct net_device *dev; + struct mlx4_ib_iboe *iboe = &ibdev->iboe; if (reset_gid_table(ibdev)) return -1; read_lock(&dev_base_lock); + spin_lock(&iboe->lock); for_each_netdev(&init_net, dev) { u8 port = mlx4_ib_get_dev_port(dev, ibdev); @@ -1684,6 +1683,7 @@ static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) mlx4_ib_get_dev_addr(dev, ibdev, port); } + spin_unlock(&iboe->lock); read_unlock(&dev_base_lock); return 0; From 5071456fe244e409adcda7226e5f0b5d1b879fd3 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 5 Feb 2014 15:13:01 +0200 Subject: [PATCH 10/20] IB/mlx4: Do IBoE GID table resets per-port The IBoE code used to reset the GID table did it for all Ethernet ports of the device. Since the whole architecture of generating GIDs and responding to events is port-based, this is inefficient and can lead to wrong content in the GID table. Change the reset flow to be per-port. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 35 ++++++++++++++++--------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 3bafd0bebd4c..8e10630561c1 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1408,7 +1408,6 @@ static void reset_gids_task(struct work_struct *work) struct mlx4_cmd_mailbox *mailbox; union ib_gid *gids; int err; - int i; struct mlx4_dev *dev = gw->dev->dev; mailbox = mlx4_alloc_cmd_mailbox(dev); @@ -1420,18 +1419,16 @@ static void reset_gids_task(struct work_struct *work) gids = mailbox->buf; memcpy(gids, gw->gids, sizeof(gw->gids)); - for (i = 1; i < gw->dev->num_ports + 1; i++) { - if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, i) == - IB_LINK_LAYER_ETHERNET) { - err = mlx4_cmd(dev, mailbox->dma, - MLX4_SET_PORT_GID_TABLE << 8 | i, - 1, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, - MLX4_CMD_WRAPPED); - if (err) - pr_warn(KERN_WARNING - "set port %d command failed\n", i); - } + if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) == + IB_LINK_LAYER_ETHERNET) { + err = mlx4_cmd(dev, mailbox->dma, + MLX4_SET_PORT_GID_TABLE << 8 | gw->port, + 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + pr_warn(KERN_WARNING + "set port %d command failed\n", gw->port); } mlx4_free_cmd_mailbox(dev, mailbox); @@ -1506,7 +1503,7 @@ static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid) } -static int reset_gid_table(struct mlx4_ib_dev *dev) +static int reset_gid_table(struct mlx4_ib_dev *dev, u8 port) { struct update_gid_work *work; @@ -1514,10 +1511,12 @@ static int reset_gid_table(struct mlx4_ib_dev *dev) work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return -ENOMEM; - memset(dev->iboe.gid_table, 0, sizeof(dev->iboe.gid_table)); + + memset(dev->iboe.gid_table[port - 1], 0, sizeof(work->gids)); memset(work->gids, 0, sizeof(work->gids)); INIT_WORK(&work->work, reset_gids_task); work->dev = dev; + work->port = port; queue_work(wq, &work->work); return 0; } @@ -1670,9 +1669,11 @@ static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) { struct net_device *dev; struct mlx4_ib_iboe *iboe = &ibdev->iboe; + int i; - if (reset_gid_table(ibdev)) - return -1; + for (i = 1; i <= ibdev->num_ports; ++i) + if (reset_gid_table(ibdev, i)) + return -1; read_lock(&dev_base_lock); spin_lock(&iboe->lock); From ad4885d279b63c65347220236d07669a2f59634b Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 5 Feb 2014 15:13:02 +0200 Subject: [PATCH 11/20] IB/mlx4: Build the port IBoE GID table properly under bonding When scanning netdevices we need to check a few more conditions and cases to build the IBoE GID table properly. For example, under bonding we must make sure that when a port is down, the bond IP address isn't programmed as a GID, since doing so will cause failure with IB core flows that selects ports by GID. Signed-off-by: Moni Shoua Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 41 ++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8e10630561c1..fdb5c7c9e127 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1507,7 +1507,6 @@ static int reset_gid_table(struct mlx4_ib_dev *dev, u8 port) { struct update_gid_work *work; - work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return -ENOMEM; @@ -1699,25 +1698,57 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) spin_lock(&iboe->lock); mlx4_foreach_ib_transport_port(port, ibdev->dev) { + enum ib_port_state port_state = IB_PORT_NOP; struct net_device *old_master = iboe->masters[port - 1]; + struct net_device *curr_netdev; struct net_device *curr_master; + iboe->netdevs[port - 1] = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); if (iboe->netdevs[port - 1]) mlx4_ib_set_default_gid(ibdev, iboe->netdevs[port - 1], port); + curr_netdev = iboe->netdevs[port - 1]; + if (iboe->netdevs[port - 1] && netif_is_bond_slave(iboe->netdevs[port - 1])) { iboe->masters[port - 1] = netdev_master_upper_dev_get( iboe->netdevs[port - 1]); + } else { + iboe->masters[port - 1] = NULL; } curr_master = iboe->masters[port - 1]; + if (curr_netdev) { + port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + mlx4_ib_set_default_gid(ibdev, curr_netdev, port); + } else { + reset_gid_table(ibdev, port); + } + /* if using bonding/team and a slave port is down, we don't the bond IP + * based gids in the table since flows that select port by gid may get + * the down port. + */ + if (curr_master && (port_state == IB_PORT_DOWN)) { + reset_gid_table(ibdev, port); + mlx4_ib_set_default_gid(ibdev, curr_netdev, port); + } /* if bonding is used it is possible that we add it to masters - only after IP address is assigned to the net bonding - interface */ - if (curr_master && (old_master != curr_master)) + * only after IP address is assigned to the net bonding + * interface. + */ + if (curr_master && (old_master != curr_master)) { + reset_gid_table(ibdev, port); + mlx4_ib_set_default_gid(ibdev, curr_netdev, port); mlx4_ib_get_dev_addr(curr_master, ibdev, port); + } + + if (!curr_master && (old_master != curr_master)) { + reset_gid_table(ibdev, port); + mlx4_ib_set_default_gid(ibdev, curr_netdev, port); + mlx4_ib_get_dev_addr(curr_netdev, ibdev, port); + } } spin_unlock(&iboe->lock); @@ -2099,6 +2130,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) } } #endif + for (i = 1 ; i <= ibdev->num_ports ; ++i) + reset_gid_table(ibdev, i); rtnl_lock(); mlx4_ib_scan_netdevs(ibdev); rtnl_unlock(); From 0f0132001fd239bb67c1f68436b95cc79de89736 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Thu, 6 Feb 2014 16:00:16 +0530 Subject: [PATCH 12/20] RDMA/cxgb4: Add missing neigh_release in LE-Workaround path Signed-off-by: Kumar Sanghvi Signed-off-by: Hariprasad Shenai Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 45126879ad28..d286bdebe2ab 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3352,6 +3352,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) goto free_dst; } + neigh_release(neigh); step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; window = (__force u16) htons((__force u16)tcph->window); From 2f75e12c4457a9b3d042c0a0d748fa198dc2ffaf Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 12 Feb 2014 11:54:15 -0500 Subject: [PATCH 13/20] IB/qib: Add missing serdes init sequence Research has shown that commit a77fcf895046 ("IB/qib: Use a single txselect module parameter for serdes tuning") missed a key serdes init sequence. This patch add that sequence. Cc: Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_iba7322.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 5bfc02f450e6..d1bd21319d7d 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2395,6 +2395,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a); qib_write_kreg(dd, kr_scratch, 0ULL); + /* ensure previous Tx parameters are not still forced */ + qib_write_kreg_port(ppd, krp_tx_deemph_override, + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + reset_tx_deemphasis_override)); + if (qib_compat_ddr_negotiate) { ppd->cpspec->ibdeltainprog = 1; ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd, From 6ecde51dd7894ffe2f959cca1fea3ea2b9ee2394 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 13 Feb 2014 20:45:17 -0800 Subject: [PATCH 14/20] mlx5: Add include of because of kzalloc()/kfree() use On some architectures (for example, arm), we don't end up indirectly pulling in the declaration of kzalloc() and kfree(), and so building anything that includes breaks. Fix this by adding an explicit include to get the declaration. Reported-by: kbuild test robot Signed-off-by: Roland Dreier --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 32cb18c399c2..130bc8d77fa5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -38,8 +38,10 @@ #include #include #include +#include #include #include + #include #include From 0861565f501ce3fcea9394d4b98c02b1f6de6b9e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 14 Feb 2014 10:29:49 +1100 Subject: [PATCH 15/20] IB/mlx5: Remove dependency on X86 Remove Kconfig dependency of mlx5_ib/mlx5_core on X86, since there is no such dependency in reality. Signed-off-by: Eli Cohen Signed-off-by: Michael Neuling Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx5/Kconfig | 2 +- drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig index 8e6aebfaf8a4..10df386c6344 100644 --- a/drivers/infiniband/hw/mlx5/Kconfig +++ b/drivers/infiniband/hw/mlx5/Kconfig @@ -1,6 +1,6 @@ config MLX5_INFINIBAND tristate "Mellanox Connect-IB HCA support" - depends on NETDEVICES && ETHERNET && PCI && X86 + depends on NETDEVICES && ETHERNET && PCI select NET_VENDOR_MELLANOX select MLX5_CORE ---help--- diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 157fe8df2c3e..8ff57e8e3e91 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -4,5 +4,5 @@ config MLX5_CORE tristate - depends on PCI && X86 + depends on PCI default n From f809309a251a13bd97cc189c3fa428782aab9716 Mon Sep 17 00:00:00 2001 From: Upinder Malhi Date: Thu, 23 Jan 2014 22:38:04 +0000 Subject: [PATCH 16/20] IB/usnic: Fix smatch endianness error Error reported at http://marc.info/?l=linux-rdma&m=138995755801039&w=2 Fix short to int cast for big endian systems. Signed-off-by: Upinder Malhi Signed-off-by: Roland Dreier --- drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index 7ecc6061f1f4..f8dfd76be89f 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -629,6 +629,7 @@ static int qp_grp_id_from_flow(struct usnic_ib_qp_grp_flow *qp_flow, { enum usnic_transport_type trans_type = qp_flow->trans_type; int err; + uint16_t port_num = 0; switch (trans_type) { case USNIC_TRANSPORT_ROCE_CUSTOM: @@ -637,9 +638,15 @@ static int qp_grp_id_from_flow(struct usnic_ib_qp_grp_flow *qp_flow, case USNIC_TRANSPORT_IPV4_UDP: err = usnic_transport_sock_get_addr(qp_flow->udp.sock, NULL, NULL, - (uint16_t *) id); + &port_num); if (err) return err; + /* + * Copy port_num to stack first and then to *id, + * so that the short to int cast works for little + * and big endian systems. + */ + *id = port_num; break; default: usnic_err("Unsupported transport %u\n", trans_type); From 7d9eacf9457efc6b614665e1095336c11ad83f0d Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 4 Feb 2014 16:54:54 +0200 Subject: [PATCH 17/20] IB/iser: Avoid dereferencing iscsi_iser conn object when not bound to iser connection Fix a possible NULL pointer dereference in disconnection flow. This can happen if the target disconnected/rejected the connection request, e.g before the binding stage between iscsi connection to the transport connection. Signed-off-by: Alex Tabachnik Signed-off-by: Roi Dayan Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_verbs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index afe95674008b..ca37edef2791 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -652,9 +652,13 @@ static int iser_disconnected_handler(struct rdma_cm_id *cma_id) /* getting here when the state is UP means that the conn is being * * terminated asynchronously from the iSCSI layer's perspective. */ if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, - ISER_CONN_TERMINATING)) - iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, - ISCSI_ERR_CONN_FAILED); + ISER_CONN_TERMINATING)){ + if (ib_conn->iser_conn) + iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, + ISCSI_ERR_CONN_FAILED); + else + iser_err("iscsi_iser connection isn't bound\n"); + } /* Complete the termination process if no posts are pending */ if (ib_conn->post_recv_buf_count == 0 && From fd8b48b22a2b7cdf21f15b01cae379e6159a7eea Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 29 Jan 2014 16:18:51 +0300 Subject: [PATCH 18/20] IB/iser: Fix use after free in iser_snd_completion() We use "tx_desc" again after we free it. Signed-off-by: Dan Carpenter Acked-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_initiator.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 538822684d5b..334f34b1cd46 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -610,11 +610,12 @@ void iser_snd_completion(struct iser_tx_desc *tx_desc, ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); kmem_cache_free(ig.desc_cache, tx_desc); + tx_desc = NULL; } atomic_dec(&ib_conn->post_send_buf_count); - if (tx_desc->type == ISCSI_TX_CONTROL) { + if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ task = (void *) ((long)(void *)tx_desc - sizeof(struct iscsi_task)); From a61d93d92f5c9533898098abb5f187840900aeb5 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Mon, 10 Feb 2014 13:48:58 +0530 Subject: [PATCH 19/20] RDMA/ocrdma: Fix traffic class shift Use correct value for obtaining traffic class from device response for Query QP request. Signed-off-by: Devesh Sharma Reported-by: Dan Carpenter Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index aa92f40c9d50..77e38ecd634e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1416,7 +1416,7 @@ int ocrdma_query_qp(struct ib_qp *ibqp, OCRDMA_QP_PARAMS_HOP_LMT_MASK) >> OCRDMA_QP_PARAMS_HOP_LMT_SHIFT; qp_attr->ah_attr.grh.traffic_class = (params.tclass_sq_psn & - OCRDMA_QP_PARAMS_SQ_PSN_MASK) >> + OCRDMA_QP_PARAMS_TCLASS_MASK) >> OCRDMA_QP_PARAMS_TCLASS_SHIFT; qp_attr->ah_attr.ah_flags = IB_AH_GRH; From 09de3f1313a30d8a22e488c9a5b96a9560cae96d Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Tue, 4 Feb 2014 12:10:48 +0530 Subject: [PATCH 20/20] RDMA/ocrdma: Fix load time panic during GID table init We should use rdma_vlan_dev_real_dev() instead of using vlan_dev_real_dev() when building the GID table for a vlan interface. Signed-off-by: Devesh Sharma Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 2ca86ca818bd..1a8a945efa60 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -127,7 +127,7 @@ static int ocrdma_addr_event(unsigned long event, struct net_device *netdev, is_vlan = netdev->priv_flags & IFF_802_1Q_VLAN; if (is_vlan) - netdev = vlan_dev_real_dev(netdev); + netdev = rdma_vlan_dev_real_dev(netdev); rcu_read_lock(); list_for_each_entry_rcu(dev, &ocrdma_dev_list, entry) {