diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt index 864ff3283780..6d40f00b358c 100644 --- a/Documentation/infiniband/ipoib.txt +++ b/Documentation/infiniband/ipoib.txt @@ -24,6 +24,49 @@ Partitions and P_Keys The P_Key for any interface is given by the "pkey" file, and the main interface for a subinterface is in "parent." +Datagram vs Connected modes + + The IPoIB driver supports two modes of operation: datagram and + connected. The mode is set and read through an interface's + /sys/class/net//mode file. + + In datagram mode, the IB UD (Unreliable Datagram) transport is used + and so the interface MTU has is equal to the IB L2 MTU minus the + IPoIB encapsulation header (4 bytes). For example, in a typical IB + fabric with a 2K MTU, the IPoIB MTU will be 2048 - 4 = 2044 bytes. + + In connected mode, the IB RC (Reliable Connected) transport is used. + Connected mode is to takes advantage of the connected nature of the + IB transport and allows an MTU up to the maximal IP packet size of + 64K, which reduces the number of IP packets needed for handling + large UDP datagrams, TCP segments, etc and increases the performance + for large messages. + + In connected mode, the interface's UD QP is still used for multicast + and communication with peers that don't support connected mode. In + this case, RX emulation of ICMP PMTU packets is used to cause the + networking stack to use the smaller UD MTU for these neighbours. + +Stateless offloads + + If the IB HW supports IPoIB stateless offloads, IPoIB advertises + TCP/IP checksum and/or Large Send (LSO) offloading capability to the + network stack. + + Large Receive (LRO) offloading is also implemented and may be turned + on/off using ethtool calls. Currently LRO is supported only for + checksum offload capable devices. + + Stateless offloads are supported only in datagram mode. + +Interrupt moderation + + If the underlying IB device supports CQ event moderation, one can + use ethtool to set interrupt mitigation parameters and thus reduce + the overhead incurred by handling interrupts. The main code path of + IPoIB doesn't use events for TX completion signaling so only RX + moderation is supported. + Debugging Information By compiling the IPoIB driver with CONFIG_INFINIBAND_IPOIB_DEBUG set @@ -55,3 +98,5 @@ References http://ietf.org/rfc/rfc4391.txt IP over InfiniBand (IPoIB) Architecture (RFC 4392) http://ietf.org/rfc/rfc4392.txt + IP over InfiniBand: Connected Mode (RFC 4755) + http://ietf.org/rfc/rfc4755.txt diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 2a2e50871b40..851de83ff455 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -297,21 +297,25 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv) id_priv->cma_dev = NULL; } -static int cma_set_qkey(struct ib_device *device, u8 port_num, - enum rdma_port_space ps, - struct rdma_dev_addr *dev_addr, u32 *qkey) +static int cma_set_qkey(struct rdma_id_private *id_priv) { struct ib_sa_mcmember_rec rec; int ret = 0; - switch (ps) { + if (id_priv->qkey) + return 0; + + switch (id_priv->id.ps) { case RDMA_PS_UDP: - *qkey = RDMA_UDP_QKEY; + id_priv->qkey = RDMA_UDP_QKEY; break; case RDMA_PS_IPOIB: - ib_addr_get_mgid(dev_addr, &rec.mgid); - ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec); - *qkey = be32_to_cpu(rec.qkey); + ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); + ret = ib_sa_get_mcmember_rec(id_priv->id.device, + id_priv->id.port_num, &rec.mgid, + &rec); + if (!ret) + id_priv->qkey = be32_to_cpu(rec.qkey); break; default: break; @@ -341,12 +345,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) ret = ib_find_cached_gid(cma_dev->device, &gid, &id_priv->id.port_num, NULL); if (!ret) { - ret = cma_set_qkey(cma_dev->device, - id_priv->id.port_num, - id_priv->id.ps, dev_addr, - &id_priv->qkey); - if (!ret) - cma_attach_to_dev(id_priv, cma_dev); + cma_attach_to_dev(id_priv, cma_dev); break; } } @@ -578,6 +577,10 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; if (cma_is_ud_ps(id_priv->id.ps)) { + ret = cma_set_qkey(id_priv); + if (ret) + return ret; + qp_attr->qkey = id_priv->qkey; *qp_attr_mask |= IB_QP_QKEY; } else { @@ -2201,6 +2204,12 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, event.status = ib_event->param.sidr_rep_rcvd.status; break; } + ret = cma_set_qkey(id_priv); + if (ret) { + event.event = RDMA_CM_EVENT_ADDR_ERROR; + event.status = -EINVAL; + break; + } if (id_priv->qkey != rep->qkey) { event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = -EINVAL; @@ -2480,10 +2489,14 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, const void *private_data, int private_data_len) { struct ib_cm_sidr_rep_param rep; + int ret; memset(&rep, 0, sizeof rep); rep.status = status; if (status == IB_SIDR_SUCCESS) { + ret = cma_set_qkey(id_priv); + if (ret) + return ret; rep.qp_num = id_priv->qp_num; rep.qkey = id_priv->qkey; } @@ -2713,6 +2726,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, IB_SA_MCMEMBER_REC_FLOW_LABEL | IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; + if (id_priv->id.ps == RDMA_PS_IPOIB) + comp_mask |= IB_SA_MCMEMBER_REC_RATE | + IB_SA_MCMEMBER_REC_RATE_SELECTOR; + mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, id_priv->id.port_num, &rec, comp_mask, GFP_KERNEL, diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index a4a82bff7100..8d71086f5a1c 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -152,7 +152,7 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) sge_cmd = qpid << 8 | 3; wqe->sge_cmd = cpu_to_be64(sge_cmd); skb->priority = CPL_PRIORITY_CONTROL; - return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb)); + return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); } int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) @@ -571,7 +571,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) (unsigned long long) rdev_p->ctrl_qp.dma_addr, rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2); skb->priority = CPL_PRIORITY_CONTROL; - return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb)); + return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); err: kfree_skb(skb); return err; @@ -701,7 +701,7 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, u32 stag_idx; u32 wptr; - if (rdev_p->flags) + if (cxio_fatal_error(rdev_p)) return -EIO; stag_state = stag_state > 0; @@ -858,7 +858,7 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size); wqe->irs = cpu_to_be32(attr->irs); skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */ - return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb)); + return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); } void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb) @@ -1041,9 +1041,9 @@ void cxio_rdev_close(struct cxio_rdev *rdev_p) cxio_hal_pblpool_destroy(rdev_p); cxio_hal_rqtpool_destroy(rdev_p); list_del(&rdev_p->entry); - rdev_p->t3cdev_p->ulp = NULL; cxio_hal_destroy_ctrl_qp(rdev_p); cxio_hal_destroy_resource(rdev_p->rscp); + rdev_p->t3cdev_p->ulp = NULL; } } diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 094a66d1480c..bfd03bf8be54 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -115,6 +115,11 @@ struct cxio_rdev { #define CXIO_ERROR_FATAL 1 }; +static inline int cxio_fatal_error(struct cxio_rdev *rdev_p) +{ + return rdev_p->flags & CXIO_ERROR_FATAL; +} + static inline int cxio_num_stags(struct cxio_rdev *rdev_p) { return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5)); @@ -188,6 +193,7 @@ void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count); void cxio_flush_hw_cq(struct t3_cq *cq); int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, u8 *cqe_flushed, u64 *cookie, u32 *credit); +int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb); #define MOD "iw_cxgb3: " #define PDBG(fmt, args...) pr_debug(MOD fmt, ## args) diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 37a4fc264a07..26fc0a4eaa74 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -165,12 +165,19 @@ static void close_rnic_dev(struct t3cdev *tdev) static void iwch_err_handler(struct t3cdev *tdev, u32 status, u32 error) { struct cxio_rdev *rdev = tdev->ulp; + struct iwch_dev *rnicp = rdev_to_iwch_dev(rdev); + struct ib_event event; - if (status == OFFLOAD_STATUS_DOWN) + if (status == OFFLOAD_STATUS_DOWN) { rdev->flags = CXIO_ERROR_FATAL; - return; + event.device = &rnicp->ibdev; + event.event = IB_EVENT_DEVICE_FATAL; + event.element.port_num = 0; + ib_dispatch_event(&event); + } + return; } static int __init iwch_init_module(void) diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index 3773453b2cf0..84735506333f 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -117,6 +117,11 @@ static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) return container_of(ibdev, struct iwch_dev, ibdev); } +static inline struct iwch_dev *rdev_to_iwch_dev(struct cxio_rdev *rdev) +{ + return container_of(rdev, struct iwch_dev, rdev); +} + static inline int t3b_device(const struct iwch_dev *rhp) { return rhp->rdev.t3cdev_p->type == T3B; diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 8699947aaf6c..fef3f1ae7225 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -139,6 +139,38 @@ static void stop_ep_timer(struct iwch_ep *ep) put_ep(&ep->com); } +int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e) +{ + int error = 0; + struct cxio_rdev *rdev; + + rdev = (struct cxio_rdev *)tdev->ulp; + if (cxio_fatal_error(rdev)) { + kfree_skb(skb); + return -EIO; + } + error = l2t_send(tdev, skb, l2e); + if (error) + kfree_skb(skb); + return error; +} + +int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) +{ + int error = 0; + struct cxio_rdev *rdev; + + rdev = (struct cxio_rdev *)tdev->ulp; + if (cxio_fatal_error(rdev)) { + kfree_skb(skb); + return -EIO; + } + error = cxgb3_ofld_send(tdev, skb); + if (error) + kfree_skb(skb); + return error; +} + static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) { struct cpl_tid_release *req; @@ -150,7 +182,7 @@ static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); skb->priority = CPL_PRIORITY_SETUP; - cxgb3_ofld_send(tdev, skb); + iwch_cxgb3_ofld_send(tdev, skb); return; } @@ -172,8 +204,7 @@ int iwch_quiesce_tid(struct iwch_ep *ep) req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE); skb->priority = CPL_PRIORITY_DATA; - cxgb3_ofld_send(ep->com.tdev, skb); - return 0; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); } int iwch_resume_tid(struct iwch_ep *ep) @@ -194,8 +225,7 @@ int iwch_resume_tid(struct iwch_ep *ep) req->val = 0; skb->priority = CPL_PRIORITY_DATA; - cxgb3_ofld_send(ep->com.tdev, skb); - return 0; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); } static void set_emss(struct iwch_ep *ep, u16 opt) @@ -252,18 +282,22 @@ static void *alloc_ep(int size, gfp_t gfp) void __free_ep(struct kref *kref) { - struct iwch_ep_common *epc; - epc = container_of(kref, struct iwch_ep_common, kref); - PDBG("%s ep %p state %s\n", __func__, epc, states[state_read(epc)]); - kfree(epc); + struct iwch_ep *ep; + ep = container_of(container_of(kref, struct iwch_ep_common, kref), + struct iwch_ep, com); + PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]); + if (ep->com.flags & RELEASE_RESOURCES) { + cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); + dst_release(ep->dst); + l2t_release(L2DATA(ep->com.tdev), ep->l2t); + } + kfree(ep); } static void release_ep_resources(struct iwch_ep *ep) { PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); - dst_release(ep->dst); - l2t_release(L2DATA(ep->com.tdev), ep->l2t); + ep->com.flags |= RELEASE_RESOURCES; put_ep(&ep->com); } @@ -382,7 +416,7 @@ static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb) PDBG("%s t3cdev %p\n", __func__, dev); req->cmd = CPL_ABORT_NO_RST; - cxgb3_ofld_send(dev, skb); + iwch_cxgb3_ofld_send(dev, skb); } static int send_halfclose(struct iwch_ep *ep, gfp_t gfp) @@ -402,8 +436,7 @@ static int send_halfclose(struct iwch_ep *ep, gfp_t gfp) req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid)); - l2t_send(ep->com.tdev, skb, ep->l2t); - return 0; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); } static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) @@ -424,8 +457,7 @@ static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); req->cmd = CPL_ABORT_SEND_RST; - l2t_send(ep->com.tdev, skb, ep->l2t); - return 0; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); } static int send_connect(struct iwch_ep *ep) @@ -469,8 +501,7 @@ static int send_connect(struct iwch_ep *ep) req->opt0l = htonl(opt0l); req->params = 0; req->opt2 = htonl(opt2); - l2t_send(ep->com.tdev, skb, ep->l2t); - return 0; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); } static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) @@ -527,7 +558,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) req->sndseq = htonl(ep->snd_seq); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; - l2t_send(ep->com.tdev, skb, ep->l2t); + iwch_l2t_send(ep->com.tdev, skb, ep->l2t); start_ep_timer(ep); state_set(&ep->com, MPA_REQ_SENT); return; @@ -578,8 +609,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) req->sndseq = htonl(ep->snd_seq); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; - l2t_send(ep->com.tdev, skb, ep->l2t); - return 0; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); } static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) @@ -630,8 +660,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) req->sndseq = htonl(ep->snd_seq); ep->mpa_skb = skb; state_set(&ep->com, MPA_REP_SENT); - l2t_send(ep->com.tdev, skb, ep->l2t); - return 0; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); } static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) @@ -795,7 +824,7 @@ static int update_rx_credits(struct iwch_ep *ep, u32 credits) OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid)); req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1)); skb->priority = CPL_PRIORITY_ACK; - cxgb3_ofld_send(ep->com.tdev, skb); + iwch_cxgb3_ofld_send(ep->com.tdev, skb); return credits; } @@ -1127,8 +1156,8 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) * We get 2 abort replies from the HW. The first one must * be ignored except for scribbling that we need one more. */ - if (!(ep->flags & ABORT_REQ_IN_PROGRESS)) { - ep->flags |= ABORT_REQ_IN_PROGRESS; + if (!(ep->com.flags & ABORT_REQ_IN_PROGRESS)) { + ep->com.flags |= ABORT_REQ_IN_PROGRESS; return CPL_RET_BUF_DONE; } @@ -1203,8 +1232,7 @@ static int listen_start(struct iwch_listen_ep *ep) req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK)); skb->priority = 1; - cxgb3_ofld_send(ep->com.tdev, skb); - return 0; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); } static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) @@ -1237,8 +1265,7 @@ static int listen_stop(struct iwch_listen_ep *ep) req->cpu_idx = 0; OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid)); skb->priority = 1; - cxgb3_ofld_send(ep->com.tdev, skb); - return 0; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); } static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb, @@ -1286,7 +1313,7 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb) rpl->opt2 = htonl(opt2); rpl->rsvd = rpl->opt2; /* workaround for HW bug */ skb->priority = CPL_PRIORITY_SETUP; - l2t_send(ep->com.tdev, skb, ep->l2t); + iwch_l2t_send(ep->com.tdev, skb, ep->l2t); return; } @@ -1315,7 +1342,7 @@ static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip, rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); rpl->opt2 = 0; rpl->rsvd = rpl->opt2; - cxgb3_ofld_send(tdev, skb); + iwch_cxgb3_ofld_send(tdev, skb); } } @@ -1534,8 +1561,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) * We get 2 peer aborts from the HW. The first one must * be ignored except for scribbling that we need one more. */ - if (!(ep->flags & PEER_ABORT_IN_PROGRESS)) { - ep->flags |= PEER_ABORT_IN_PROGRESS; + if (!(ep->com.flags & PEER_ABORT_IN_PROGRESS)) { + ep->com.flags |= PEER_ABORT_IN_PROGRESS; return CPL_RET_BUF_DONE; } @@ -1613,7 +1640,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); rpl->cmd = CPL_ABORT_NO_RST; - cxgb3_ofld_send(ep->com.tdev, rpl_skb); + iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb); out: if (release) release_ep_resources(ep); @@ -2017,8 +2044,11 @@ int iwch_destroy_listen(struct iw_cm_id *cm_id) ep->com.rpl_done = 0; ep->com.rpl_err = 0; err = listen_stop(ep); + if (err) + goto done; wait_event(ep->com.waitq, ep->com.rpl_done); cxgb3_free_stid(ep->com.tdev, ep->stid); +done: err = ep->com.rpl_err; cm_id->rem_ref(cm_id); put_ep(&ep->com); @@ -2030,12 +2060,22 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) int ret=0; unsigned long flags; int close = 0; + int fatal = 0; + struct t3cdev *tdev; + struct cxio_rdev *rdev; spin_lock_irqsave(&ep->com.lock, flags); PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, states[ep->com.state], abrupt); + tdev = (struct t3cdev *)ep->com.tdev; + rdev = (struct cxio_rdev *)tdev->ulp; + if (cxio_fatal_error(rdev)) { + fatal = 1; + close_complete_upcall(ep); + ep->com.state = DEAD; + } switch (ep->com.state) { case MPA_REQ_WAIT: case MPA_REQ_SENT: @@ -2075,7 +2115,11 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) ret = send_abort(ep, NULL, gfp); else ret = send_halfclose(ep, gfp); + if (ret) + fatal = 1; } + if (fatal) + release_ep_resources(ep); return ret; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h index d7c7e09f0996..43c0aea7eadc 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h @@ -147,6 +147,7 @@ enum iwch_ep_state { enum iwch_ep_flags { PEER_ABORT_IN_PROGRESS = (1 << 0), ABORT_REQ_IN_PROGRESS = (1 << 1), + RELEASE_RESOURCES = (1 << 2), }; struct iwch_ep_common { @@ -161,6 +162,7 @@ struct iwch_ep_common { wait_queue_head_t waitq; int rpl_done; int rpl_err; + u32 flags; }; struct iwch_listen_ep { @@ -188,7 +190,6 @@ struct iwch_ep { u16 plen; u32 ird; u32 ord; - u32 flags; }; static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id) diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index c758fbd58478..2f546a625330 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -751,7 +751,7 @@ int iwch_post_zb_read(struct iwch_qp *qhp) wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)| V_FW_RIWR_LEN(flit_cnt)); skb->priority = CPL_PRIORITY_DATA; - return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); + return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); } /* @@ -783,7 +783,7 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG)); wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)); skb->priority = CPL_PRIORITY_DATA; - return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); + return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); } /* diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2ccb9d31771f..ae3d7590346e 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -394,8 +394,7 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) { - /* FIXME want pgprot_writecombine() for BlueFlame pages */ - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, to_mucontext(context)->uar.pfn + diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 04b12ad23390..17621de54a9f 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -289,8 +289,8 @@ static inline __le32 get_crc_value(struct nes_v4_quad *nes_quad) static inline void set_wqe_64bit_value(__le32 *wqe_words, u32 index, u64 value) { - wqe_words[index] = cpu_to_le32((u32) ((unsigned long)value)); - wqe_words[index + 1] = cpu_to_le32((u32)(upper_32_bits((unsigned long)value))); + wqe_words[index] = cpu_to_le32((u32) value); + wqe_words[index + 1] = cpu_to_le32(upper_32_bits(value)); } static inline void diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 52425154acd4..dbd9a75474e3 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -426,6 +426,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, if (type == NES_TIMER_TYPE_CLOSE) { new_send->timetosend += (HZ/10); if (cm_node->recv_entry) { + kfree(new_send); WARN_ON(1); return -EINVAL; } @@ -445,8 +446,8 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, if (ret != NETDEV_TX_OK) { nes_debug(NES_DBG_CM, "Error sending packet %p " "(jiffies = %lu)\n", new_send, jiffies); - atomic_dec(&new_send->skb->users); new_send->timetosend = jiffies; + ret = NETDEV_TX_OK; } else { cm_packets_sent++; if (!send_retrans) { @@ -630,7 +631,6 @@ static void nes_cm_timer_tick(unsigned long pass) nes_debug(NES_DBG_CM, "rexmit failed for " "node=%p\n", cm_node); cm_packets_bounced++; - atomic_dec(&send_entry->skb->users); send_entry->retrycount--; nexttimeout = jiffies + NES_SHORT_TIME; settimer = 1; @@ -666,11 +666,6 @@ static void nes_cm_timer_tick(unsigned long pass) spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); rem_ref_cm_node(cm_node->cm_core, cm_node); - if (ret != NETDEV_TX_OK) { - nes_debug(NES_DBG_CM, "rexmit failed for cm_node=%p\n", - cm_node); - break; - } } if (settimer) { @@ -1262,7 +1257,6 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core, cm_node->nesqp = NULL; } - cm_node->freed = 1; kfree(cm_node); return 0; } @@ -1999,13 +1993,17 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, if (loopbackremotelistener == NULL) { create_event(cm_node, NES_CM_EVENT_ABORTED); } else { - atomic_inc(&cm_loopbacks); loopback_cm_info = *cm_info; loopback_cm_info.loc_port = cm_info->rem_port; loopback_cm_info.rem_port = cm_info->loc_port; loopback_cm_info.cm_id = loopbackremotelistener->cm_id; loopbackremotenode = make_cm_node(cm_core, nesvnic, &loopback_cm_info, loopbackremotelistener); + if (!loopbackremotenode) { + rem_ref_cm_node(cm_node->cm_core, cm_node); + return NULL; + } + atomic_inc(&cm_loopbacks); loopbackremotenode->loopbackpartner = cm_node; loopbackremotenode->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; @@ -2690,6 +2688,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct ib_mr *ibmr = NULL; struct ib_phys_buf ibphysbuf; struct nes_pd *nespd; + u64 tagged_offset; @@ -2755,10 +2754,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ibphysbuf.addr = nesqp->ietf_frame_pbase; ibphysbuf.size = conn_param->private_data_len + sizeof(struct ietf_mpa_frame); + tagged_offset = (u64)(unsigned long)nesqp->ietf_frame; ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd, &ibphysbuf, 1, IB_ACCESS_LOCAL_WRITE, - (u64 *)&nesqp->ietf_frame); + &tagged_offset); if (!ibmr) { nes_debug(NES_DBG_CM, "Unable to register memory region" "for lSMM for cm_node = %p \n", @@ -2782,7 +2782,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) sizeof(struct ietf_mpa_frame)); set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, - (u64)nesqp->ietf_frame); + (u64)(unsigned long)nesqp->ietf_frame); wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index d5f778202eb7..80bba1892571 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -298,7 +298,6 @@ struct nes_cm_node { struct nes_vnic *nesvnic; int apbvt_set; int accept_pend; - int freed; struct list_head timer_entry; struct list_head reset_entry; struct nes_qp *nesqp; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 52e734042b8e..d6fc9ae44062 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -46,6 +46,10 @@ static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR; module_param(nes_lro_max_aggr, uint, 0444); MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation"); +static int wide_ppm_offset; +module_param(wide_ppm_offset, int, 0644); +MODULE_PARM_DESC(wide_ppm_offset, "Increase CX4 interface clock ppm offset, 0=100ppm (default), 1=300ppm"); + static u32 crit_err_count; u32 int_mod_timer_init; u32 int_mod_cq_depth_256; @@ -546,8 +550,11 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { msleep(1); } if (int_cnt > 1) { + u32 sds; spin_lock_irqsave(&nesadapter->phy_lock, flags); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088); + sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); + sds |= 0x00000040; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds); mh_detected++; reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); reset_value |= 0x0000003d; @@ -736,39 +743,49 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, { int i; u32 u32temp; - u32 serdes_common_control; + u32 sds; if (hw_rev != NE020_REV) { /* init serdes 0 */ + if (wide_ppm_offset && (nesadapter->phy_type[0] == NES_PHY_TYPE_CX4)) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA); + else + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { - serdes_common_control = nes_read_indexed(nesdev, - NES_IDX_ETH_SERDES_COMMON_CONTROL0); - serdes_common_control |= 0x000000100; - nes_write_indexed(nesdev, - NES_IDX_ETH_SERDES_COMMON_CONTROL0, - serdes_common_control); - } else if (!OneG_Mode) { + sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0); + sds |= 0x00000100; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds); + } + if (!OneG_Mode) nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000); + + if (port_count < 2) + return 0; + + /* init serdes 1 */ + switch (nesadapter->phy_type[1]) { + case NES_PHY_TYPE_ARGUS: + case NES_PHY_TYPE_SFP_D: + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000); + break; + case NES_PHY_TYPE_CX4: + sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); + sds &= 0xFFFFFFBF; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds); + if (wide_ppm_offset) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000FFFAA); + else + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF); + break; + case NES_PHY_TYPE_PUMA_1G: + sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); + sds |= 0x000000100; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds); } - if (((port_count > 1) && - (nesadapter->phy_type[0] != NES_PHY_TYPE_PUMA_1G)) || - ((port_count > 2) && - (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G))) { - /* init serdes 1 */ - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF); - if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { - serdes_common_control = nes_read_indexed(nesdev, - NES_IDX_ETH_SERDES_COMMON_CONTROL1); - serdes_common_control |= 0x000000100; - nes_write_indexed(nesdev, - NES_IDX_ETH_SERDES_COMMON_CONTROL1, - serdes_common_control); - } else if (!OneG_Mode) { - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000); - } - } + if (!OneG_Mode) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000); } else { /* init serdes 0 */ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008); @@ -1259,203 +1276,162 @@ int nes_init_phy(struct nes_device *nesdev) { struct nes_adapter *nesadapter = nesdev->nesadapter; u32 counter = 0; - u32 sds_common_control0; + u32 sds; u32 mac_index = nesdev->mac_index; u32 tx_config = 0; u16 phy_data; u32 temp_phy_data = 0; u32 temp_phy_data2 = 0; - u32 i = 0; + u8 phy_type = nesadapter->phy_type[mac_index]; + u8 phy_index = nesadapter->phy_index[mac_index]; if ((nesadapter->OneG_Mode) && - (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) { + (phy_type != NES_PHY_TYPE_PUMA_1G)) { nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index); - if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) { - printk(PFX "%s: Programming mdc config for 1G\n", __func__); + if (phy_type == NES_PHY_TYPE_1G) { tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); tx_config &= 0xFFFFFFE3; tx_config |= 0x04; nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); } - nes_read_1G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 1 phy address %u = 0x%X.\n", - nesadapter->phy_index[mac_index], phy_data); - nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000); + nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000); /* Reset the PHY */ - nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], 0x8000); + nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000); udelay(100); counter = 0; do { - nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0 = 0x%X.\n", phy_data); - if (counter++ > 100) break; + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); + if (counter++ > 100) + break; } while (phy_data & 0x8000); /* Setting no phy loopback */ phy_data &= 0xbfff; phy_data |= 0x1140; - nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data); - nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0 = 0x%X.\n", phy_data); - - nes_read_1G_phy_reg(nesdev, 0x17, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x17 = 0x%X.\n", phy_data); - - nes_read_1G_phy_reg(nesdev, 0x1e, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x1e = 0x%X.\n", phy_data); + nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data); + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); + nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data); + nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data); /* Setting the interrupt mask */ - nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x19 = 0x%X.\n", phy_data); - nes_write_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], 0xffee); - - nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x19 = 0x%X.\n", phy_data); + nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee); + nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); /* turning on flow control */ - nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x4 = 0x%X.\n", phy_data); - nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], - (phy_data & ~(0x03E0)) | 0xc00); - /* nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], - phy_data | 0xc00); */ - nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x4 = 0x%X.\n", phy_data); + nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00); + nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data); - nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x9 = 0x%X.\n", phy_data); /* Clear Half duplex */ - nes_write_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], - phy_data & ~(0x0100)); - nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x9 = 0x%X.\n", phy_data); + nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100)); + nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); - nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data); - nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data | 0x0300); - } else { - if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) || - (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) { - /* setup 10G MDIO operation */ - tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); - tx_config &= 0xFFFFFFE3; - tx_config |= 0x15; - nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300); + + return 0; + } + + if ((phy_type == NES_PHY_TYPE_IRIS) || + (phy_type == NES_PHY_TYPE_ARGUS) || + (phy_type == NES_PHY_TYPE_SFP_D)) { + /* setup 10G MDIO operation */ + tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); + tx_config &= 0xFFFFFFE3; + tx_config |= 0x15; + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); + } + if ((phy_type == NES_PHY_TYPE_ARGUS) || + (phy_type == NES_PHY_TYPE_SFP_D)) { + /* Check firmware heartbeat */ + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + udelay(1500); + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); + temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + + if (temp_phy_data != temp_phy_data2) + return 0; + + /* no heartbeat, configure the PHY */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); + if (phy_type == NES_PHY_TYPE_ARGUS) { + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008); + } else { + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038); } - if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) { - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - mdelay(10); - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); - temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + /* setup LEDs */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009); - /* - * if firmware is already running (like from a - * driver un-load/load, don't do anything. - */ - if (temp_phy_data == temp_phy_data2) { - /* configure QT2505 AMCC PHY */ - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0x0000, 0x8000); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0000); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc302, 0x0044); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc318, 0x0052); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc319, 0x0008); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc31a, 0x0098); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0026, 0x0E00); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0001); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0028, 0xA528); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528); - /* - * remove micro from reset; chip boots from ROM, - * uploads EEPROM f/w image, uC executes f/w - */ - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0002); + /* Bring PHY out of reset */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002); - /* - * wait for heart beat to start to - * know loading is done - */ - counter = 0; - do { - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - if (counter++ > 1000) { - nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from heartbeat check \n"); - break; - } - mdelay(100); - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); - temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - } while ((temp_phy_data2 == temp_phy_data)); - - /* - * wait for tracking to start to know - * f/w is good to go - */ - counter = 0; - do { - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7fd); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - if (counter++ > 1000) { - nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from status check \n"); - break; - } - mdelay(1000); - /* - * nes_debug(NES_DBG_PHY, "AMCC PHY- phy_status not ready yet = 0x%02X\n", - * temp_phy_data); - */ - } while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70)); - - /* set LOS Control invert RXLOSB_I_PADINV */ - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd003, 0x0000); - /* set LOS Control to mask of RXLOSB_I */ - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc314, 0x0042); - /* set LED1 to input mode (LED1 and LED2 share same LED) */ - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd006, 0x0007); - /* set LED2 to RX link_status and activity */ - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd007, 0x000A); - /* set LED3 to RX link_status */ - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd008, 0x0009); - - /* - * reset the res-calibration on t2 - * serdes; ensures it is stable after - * the amcc phy is stable - */ - - sds_common_control0 = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0); - sds_common_control0 |= 0x1; - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0); - - /* release the res-calibration reset */ - sds_common_control0 &= 0xfffffffe; - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0); - - i = 0; - while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040) - && (i++ < 5000)) { - /* mdelay(1); */ - } - - /* - * wait for link train done before moving on, - * or will get an interupt storm - */ - counter = 0; - do { - temp_phy_data = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + - (0x200 * (nesdev->mac_index & 1))); - if (counter++ > 1000) { - nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from link train wait \n"); - break; - } - mdelay(1); - } while (((temp_phy_data & 0x0f1f0000) != 0x0f0f0000)); + /* Check for heartbeat */ + counter = 0; + mdelay(690); + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + do { + if (counter++ > 150) { + nes_debug(NES_DBG_PHY, "No PHY heartbeat\n"); + break; } - } + mdelay(1); + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); + temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + } while ((temp_phy_data2 == temp_phy_data)); + + /* wait for tracking */ + counter = 0; + do { + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if (counter++ > 300) { + nes_debug(NES_DBG_PHY, "PHY did not track\n"); + break; + } + mdelay(10); + } while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70)); + + /* setup signal integrity */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x0002); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc314, 0x0063); + + /* reset serdes */ + sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + + mac_index * 0x200); + sds |= 0x1; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + + mac_index * 0x200, sds); + sds &= 0xfffffffe; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + + mac_index * 0x200, sds); + + counter = 0; + while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040) + && (counter++ < 5000)) + ; } return 0; } @@ -2359,6 +2335,7 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) u16 temp_phy_data; u32 pcs_val = 0x0f0f0000; u32 pcs_mask = 0x0f1f0000; + u32 cdr_ctrl; spin_lock_irqsave(&nesadapter->phy_lock, flags); if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) { @@ -2473,6 +2450,7 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) break; case NES_PHY_TYPE_ARGUS: + case NES_PHY_TYPE_SFP_D: /* clear the alarms */ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008); nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001); @@ -2483,19 +2461,18 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9004); nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9005); /* check link status */ - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003); temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - u32temp = 100; - do { - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); - phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - if ((phy_data == temp_phy_data) || (!(--u32temp))) - break; - temp_phy_data = phy_data; - } while (1); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); + nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); + phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + + phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0; + nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", - __func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP"); + __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP"); break; case NES_PHY_TYPE_PUMA_1G: @@ -2511,6 +2488,17 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) } if (phy_data & 0x0004) { + if (wide_ppm_offset && + (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) && + (nesadapter->hw_rev != NE020_REV)) { + cdr_ctrl = nes_read_indexed(nesdev, + NES_IDX_ETH_SERDES_CDR_CONTROL0 + + mac_index * 0x200); + nes_write_indexed(nesdev, + NES_IDX_ETH_SERDES_CDR_CONTROL0 + + mac_index * 0x200, + cdr_ctrl | 0x000F0000); + } nesadapter->mac_link_down[mac_index] = 0; list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { nes_debug(NES_DBG_PHY, "The Link is UP!!. linkup was %d\n", @@ -2525,6 +2513,17 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) } } } else { + if (wide_ppm_offset && + (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) && + (nesadapter->hw_rev != NE020_REV)) { + cdr_ctrl = nes_read_indexed(nesdev, + NES_IDX_ETH_SERDES_CDR_CONTROL0 + + mac_index * 0x200); + nes_write_indexed(nesdev, + NES_IDX_ETH_SERDES_CDR_CONTROL0 + + mac_index * 0x200, + cdr_ctrl & 0xFFF0FFFF); + } nesadapter->mac_link_down[mac_index] = 1; list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n", diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index f41a8710d2a8..c3654c6383fe 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -35,12 +35,14 @@ #include +#define NES_PHY_TYPE_CX4 1 #define NES_PHY_TYPE_1G 2 #define NES_PHY_TYPE_IRIS 3 #define NES_PHY_TYPE_ARGUS 4 #define NES_PHY_TYPE_PUMA_1G 5 #define NES_PHY_TYPE_PUMA_10G 6 #define NES_PHY_TYPE_GLADIUS 7 +#define NES_PHY_TYPE_SFP_D 8 #define NES_MULTICAST_PF_MAX 8 diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index ecb1f6fd6276..c6e6611d3016 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1426,49 +1426,55 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 mac_index = nesdev->mac_index; + u8 phy_type = nesadapter->phy_type[mac_index]; + u8 phy_index = nesadapter->phy_index[mac_index]; u16 phy_data; et_cmd->duplex = DUPLEX_FULL; et_cmd->port = PORT_MII; + et_cmd->maxtxpkt = 511; + et_cmd->maxrxpkt = 511; if (nesadapter->OneG_Mode) { et_cmd->speed = SPEED_1000; - if (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) { + if (phy_type == NES_PHY_TYPE_PUMA_1G) { et_cmd->supported = SUPPORTED_1000baseT_Full; et_cmd->advertising = ADVERTISED_1000baseT_Full; et_cmd->autoneg = AUTONEG_DISABLE; et_cmd->transceiver = XCVR_INTERNAL; - et_cmd->phy_address = nesdev->mac_index; + et_cmd->phy_address = mac_index; } else { - et_cmd->supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg; - et_cmd->advertising = ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg; - nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], &phy_data); + et_cmd->supported = SUPPORTED_1000baseT_Full + | SUPPORTED_Autoneg; + et_cmd->advertising = ADVERTISED_1000baseT_Full + | ADVERTISED_Autoneg; + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); if (phy_data & 0x1000) et_cmd->autoneg = AUTONEG_ENABLE; else et_cmd->autoneg = AUTONEG_DISABLE; et_cmd->transceiver = XCVR_EXTERNAL; - et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index]; + et_cmd->phy_address = phy_index; } - } else { - if ((nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) || - (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_ARGUS)) { - et_cmd->transceiver = XCVR_EXTERNAL; - et_cmd->port = PORT_FIBRE; - et_cmd->supported = SUPPORTED_FIBRE; - et_cmd->advertising = ADVERTISED_FIBRE; - et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index]; - } else { - et_cmd->transceiver = XCVR_INTERNAL; - et_cmd->supported = SUPPORTED_10000baseT_Full; - et_cmd->advertising = ADVERTISED_10000baseT_Full; - et_cmd->phy_address = nesdev->mac_index; - } - et_cmd->speed = SPEED_10000; - et_cmd->autoneg = AUTONEG_DISABLE; + return 0; } - et_cmd->maxtxpkt = 511; - et_cmd->maxrxpkt = 511; + if ((phy_type == NES_PHY_TYPE_IRIS) || + (phy_type == NES_PHY_TYPE_ARGUS) || + (phy_type == NES_PHY_TYPE_SFP_D)) { + et_cmd->transceiver = XCVR_EXTERNAL; + et_cmd->port = PORT_FIBRE; + et_cmd->supported = SUPPORTED_FIBRE; + et_cmd->advertising = ADVERTISED_FIBRE; + et_cmd->phy_address = phy_index; + } else { + et_cmd->transceiver = XCVR_INTERNAL; + et_cmd->supported = SUPPORTED_10000baseT_Full; + et_cmd->advertising = ADVERTISED_10000baseT_Full; + et_cmd->phy_address = mac_index; + } + et_cmd->speed = SPEED_10000; + et_cmd->autoneg = AUTONEG_DISABLE; return 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 5a76a5510350..4c57f329dd50 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -70,12 +70,14 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) */ if (ppriv->pkey == pkey) { result = -ENOTUNIQ; + priv = NULL; goto err; } list_for_each_entry(priv, &ppriv->child_intfs, list) { if (priv->pkey == pkey) { result = -ENOTUNIQ; + priv = NULL; goto err; } } @@ -96,7 +98,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) result = ipoib_set_dev_features(priv, ppriv->ca); if (result) - goto device_init_failed; + goto err; priv->pkey = pkey; @@ -109,7 +111,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ipoib_warn(ppriv, "failed to initialize subinterface: " "device %s, port %d", ppriv->ca->name, ppriv->port); - goto device_init_failed; + goto err; } result = register_netdevice(priv->dev); @@ -146,19 +148,19 @@ sysfs_failed: register_failed: ipoib_dev_cleanup(priv->dev); -device_init_failed: - free_netdev(priv->dev); - err: mutex_unlock(&ppriv->vlan_mutex); rtnl_unlock(); + if (priv) + free_netdev(priv->dev); + return result; } int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) { struct ipoib_dev_priv *ppriv, *priv, *tpriv; - int ret = -ENOENT; + struct net_device *dev = NULL; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -172,14 +174,17 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) unregister_netdevice(priv->dev); ipoib_dev_cleanup(priv->dev); list_del(&priv->list); - free_netdev(priv->dev); - - ret = 0; + dev = priv->dev; break; } } mutex_unlock(&ppriv->vlan_mutex); rtnl_unlock(); - return ret; + if (dev) { + free_netdev(dev); + return 0; + } + + return -ENODEV; } diff --git a/drivers/net/mlx4/port.c b/drivers/net/mlx4/port.c index 7cce3342ef8c..606aa58afdea 100644 --- a/drivers/net/mlx4/port.c +++ b/drivers/net/mlx4/port.c @@ -299,13 +299,14 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port) struct mlx4_cmd_mailbox *mailbox; int err; + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) + return 0; + mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); memset(mailbox->buf, 0, 256); - if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) - return 0; ((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port]; err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,