First batch of InfiniBand/RDMA changes for the 3.8 merge window:

- A good chunk of Bart Van Assche's SRP fixes
  - UAPI disintegration from David Howells
  - mlx4 support for "64-byte CQE" hardware feature from Or Gerlitz
  - Other miscellaneous fixes
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.11 (GNU/Linux)
 
 iQIcBAABCAAGBQJQxstjAAoJEENa44ZhAt0hURUQAJd7HumReKTdRqzIzXPc+rgl
 pRR5eqplPY2anfJMqLDiFphVjfCiKyhudomdo+RUbBFFnUVLlBzk80A0/IZ3g3PZ
 MHOT+pX4PGDd+3FQxV2AaQCMwgGbvC0haInXyQDVZGm0fbMjRd699yGVWBiA8rOI
 VNhUi5WMmynSINYokM8UxrhfoUfy3QxsOvZBZ3XUD1zjJB0IMd5HRdiDUG7ur0q+
 rfpWKv51DXT81ux36MXbdPBhLRbzx4B7EwuPWOFPqJe1KwK2cD8iA6DwEKC9KMxS
 Kj2+CxB5Bfpfz8bhLi2VZcMgAKiSIQDXUtiKz8h0yFVhvADYZLU7zdGN49mCqKcY
 9dwX8+0aIVez6WB2jH+ir2FSG65NsnvqESwQ4LLQ9bhArgf9fapVGlypHwcKi5hh
 3j2ipO/RyT56nLQeI0gz1P5mQneFSWlY96CD8WP+9OxO/mVnxViajzevSwT/cLE6
 IOMks8DPhsQK88JXSx0XKVxn3zrJ9SXbYDhRWJ6f4w/fxraRXlFdQi0UfcsAajkX
 5qmM4e8Oy97TJYiY1RkAmb7aV182xMWVjtDx2FFTQ5ukgDea/DklIM/JNQ475027
 N7zMW1tP6+gnnDyMEkteVuPdbl1fzwI3RdXCh0mFZHZ5tvegkdxbw0XxERcevnQN
 LZfME8wCuC7+RtmE38Li
 =TQK2
 -----END PGP SIGNATURE-----

Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

Pull infiniband upate from Roland Dreier:
 "First batch of InfiniBand/RDMA changes for the 3.8 merge window:
   - A good chunk of Bart Van Assche's SRP fixes
   - UAPI disintegration from David Howells
   - mlx4 support for "64-byte CQE" hardware feature from Or Gerlitz
   - Other miscellaneous fixes"

Fix up trivial conflict in mellanox/mlx4 driver.

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (33 commits)
  RDMA/nes: Fix for crash when registering zero length MR for CQ
  RDMA/nes: Fix for terminate timer crash
  RDMA/nes: Fix for BUG_ON due to adding already-pending timer
  IB/srp: Allow SRP disconnect through sysfs
  srp_transport: Document sysfs attributes
  srp_transport: Simplify attribute initialization code
  srp_transport: Fix attribute registration
  IB/srp: Document sysfs attributes
  IB/srp: send disconnect request without waiting for CM timewait exit
  IB/srp: destroy and recreate QP and CQs when reconnecting
  IB/srp: Eliminate state SRP_TARGET_DEAD
  IB/srp: Introduce the helper function srp_remove_target()
  IB/srp: Suppress superfluous error messages
  IB/srp: Process all error completions
  IB/srp: Introduce srp_handle_qp_err()
  IB/srp: Simplify SCSI error handling
  IB/srp: Keep processing commands during host removal
  IB/srp: Eliminate state SRP_TARGET_CONNECTING
  IB/srp: Increase block layer timeout
  RDMA/cm: Change return value from find_gid_port()
  ...
This commit is contained in:
Linus Torvalds 2012-12-13 19:19:09 -08:00
commit f132c54e3a
42 changed files with 695 additions and 328 deletions

@ -0,0 +1,156 @@
What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/add_target
Date: January 2, 2006
KernelVersion: 2.6.15
Contact: linux-rdma@vger.kernel.org
Description: Interface for making ib_srp connect to a new target.
One can request ib_srp to connect to a new target by writing
a comma-separated list of login parameters to this sysfs
attribute. The supported parameters are:
* id_ext, a 16-digit hexadecimal number specifying the eight
byte identifier extension in the 16-byte SRP target port
identifier. The target port identifier is sent by ib_srp
to the target in the SRP_LOGIN_REQ request.
* ioc_guid, a 16-digit hexadecimal number specifying the eight
byte I/O controller GUID portion of the 16-byte target port
identifier.
* dgid, a 32-digit hexadecimal number specifying the
destination GID.
* pkey, a four-digit hexadecimal number specifying the
InfiniBand partition key.
* service_id, a 16-digit hexadecimal number specifying the
InfiniBand service ID used to establish communication with
the SRP target. How to find out the value of the service ID
is specified in the documentation of the SRP target.
* max_sect, a decimal number specifying the maximum number of
512-byte sectors to be transferred via a single SCSI command.
* max_cmd_per_lun, a decimal number specifying the maximum
number of outstanding commands for a single LUN.
* io_class, a hexadecimal number specifying the SRP I/O class.
Must be either 0xff00 (rev 10) or 0x0100 (rev 16a). The I/O
class defines the format of the SRP initiator and target
port identifiers.
* initiator_ext, a 16-digit hexadecimal number specifying the
identifier extension portion of the SRP initiator port
identifier. This data is sent by the initiator to the target
in the SRP_LOGIN_REQ request.
* cmd_sg_entries, a number in the range 1..255 that specifies
the maximum number of data buffer descriptors stored in the
SRP_CMD information unit itself. With allow_ext_sg=0 the
parameter cmd_sg_entries defines the maximum S/G list length
for a single SRP_CMD, and commands whose S/G list length
exceeds this limit after S/G list collapsing will fail.
* allow_ext_sg, whether ib_srp is allowed to include a partial
memory descriptor list in an SRP_CMD instead of the entire
list. If a partial memory descriptor list has been included
in an SRP_CMD the remaining memory descriptors are
communicated from initiator to target via an additional RDMA
transfer. Setting allow_ext_sg to 1 increases the maximum
amount of data that can be transferred between initiator and
target via a single SCSI command. Since not all SRP target
implementations support partial memory descriptor lists the
default value for this option is 0.
* sg_tablesize, a number in the range 1..2048 specifying the
maximum S/G list length the SCSI layer is allowed to pass to
ib_srp. Specifying a value that exceeds cmd_sg_entries is
only safe with partial memory descriptor list support enabled
(allow_ext_sg=1).
What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/ibdev
Date: January 2, 2006
KernelVersion: 2.6.15
Contact: linux-rdma@vger.kernel.org
Description: HCA name (<hca>).
What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/port
Date: January 2, 2006
KernelVersion: 2.6.15
Contact: linux-rdma@vger.kernel.org
Description: HCA port number (<port_number>).
What: /sys/class/scsi_host/host<n>/allow_ext_sg
Date: May 19, 2011
KernelVersion: 2.6.39
Contact: linux-rdma@vger.kernel.org
Description: Whether ib_srp is allowed to include a partial memory
descriptor list in an SRP_CMD when communicating with an SRP
target.
What: /sys/class/scsi_host/host<n>/cmd_sg_entries
Date: May 19, 2011
KernelVersion: 2.6.39
Contact: linux-rdma@vger.kernel.org
Description: Maximum number of data buffer descriptors that may be sent to
the target in a single SRP_CMD request.
What: /sys/class/scsi_host/host<n>/dgid
Date: June 17, 2006
KernelVersion: 2.6.17
Contact: linux-rdma@vger.kernel.org
Description: InfiniBand destination GID used for communication with the SRP
target. Differs from orig_dgid if port redirection has happened.
What: /sys/class/scsi_host/host<n>/id_ext
Date: June 17, 2006
KernelVersion: 2.6.17
Contact: linux-rdma@vger.kernel.org
Description: Eight-byte identifier extension portion of the 16-byte target
port identifier.
What: /sys/class/scsi_host/host<n>/ioc_guid
Date: June 17, 2006
KernelVersion: 2.6.17
Contact: linux-rdma@vger.kernel.org
Description: Eight-byte I/O controller GUID portion of the 16-byte target
port identifier.
What: /sys/class/scsi_host/host<n>/local_ib_device
Date: November 29, 2006
KernelVersion: 2.6.19
Contact: linux-rdma@vger.kernel.org
Description: Name of the InfiniBand HCA used for communicating with the
SRP target.
What: /sys/class/scsi_host/host<n>/local_ib_port
Date: November 29, 2006
KernelVersion: 2.6.19
Contact: linux-rdma@vger.kernel.org
Description: Number of the HCA port used for communicating with the
SRP target.
What: /sys/class/scsi_host/host<n>/orig_dgid
Date: June 17, 2006
KernelVersion: 2.6.17
Contact: linux-rdma@vger.kernel.org
Description: InfiniBand destination GID specified in the parameters
written to the add_target sysfs attribute.
What: /sys/class/scsi_host/host<n>/pkey
Date: June 17, 2006
KernelVersion: 2.6.17
Contact: linux-rdma@vger.kernel.org
Description: A 16-bit number representing the InfiniBand partition key used
for communication with the SRP target.
What: /sys/class/scsi_host/host<n>/req_lim
Date: October 20, 2010
KernelVersion: 2.6.36
Contact: linux-rdma@vger.kernel.org
Description: Number of requests ib_srp can send to the target before it has
to wait for more credits. For more information see also the
SRP credit algorithm in the SRP specification.
What: /sys/class/scsi_host/host<n>/service_id
Date: June 17, 2006
KernelVersion: 2.6.17
Contact: linux-rdma@vger.kernel.org
Description: InfiniBand service ID used for establishing communication with
the SRP target.
What: /sys/class/scsi_host/host<n>/zero_req_lim
Date: September 20, 2006
KernelVersion: 2.6.18
Contact: linux-rdma@vger.kernel.org
Description: Number of times the initiator had to wait before sending a
request to the target because it ran out of credits. For more
information see also the SRP credit algorithm in the SRP
specification.

@ -0,0 +1,19 @@
What: /sys/class/srp_remote_ports/port-<h>:<n>/delete
Date: June 1, 2012
KernelVersion: 3.7
Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
Description: Instructs an SRP initiator to disconnect from a target and to
remove all LUNs imported from that target.
What: /sys/class/srp_remote_ports/port-<h>:<n>/port_id
Date: June 27, 2007
KernelVersion: 2.6.24
Contact: linux-scsi@vger.kernel.org
Description: 16-byte local SRP port identifier in hexadecimal format. An
example: 4c:49:4e:55:58:20:56:49:4f:00:00:00:00:00:00:00.
What: /sys/class/srp_remote_ports/port-<h>:<n>/roles
Date: June 27, 2007
KernelVersion: 2.6.24
Contact: linux-scsi@vger.kernel.org
Description: Role of the remote port. Either "SRP Initiator" or "SRP Target".

@ -345,17 +345,17 @@ static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_nu
err = ib_query_port(device, port_num, &props);
if (err)
return 1;
return err;
for (i = 0; i < props.gid_tbl_len; ++i) {
err = ib_query_gid(device, port_num, i, &tmp);
if (err)
return 1;
return err;
if (!memcmp(&tmp, gid, sizeof tmp))
return 0;
}
return -EAGAIN;
return -EADDRNOTAVAIL;
}
static int cma_acquire_dev(struct rdma_id_private *id_priv)
@ -388,8 +388,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
if (!ret) {
id_priv->id.port_num = port;
goto out;
} else if (ret == 1)
break;
}
}
}
}

@ -311,6 +311,7 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
if (cq->ibcq.event_handler)
cq->ibcq.event_handler(&ib_event,
cq->ibcq.cq_context);
break;
}
default:

@ -128,9 +128,8 @@ static void stop_ep_timer(struct iwch_ep *ep)
{
PDBG("%s ep %p\n", __func__, ep);
if (!timer_pending(&ep->timer)) {
printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n",
WARN(1, "%s timer stopped when its not running! ep %p state %u\n",
__func__, ep, ep->com.state);
WARN_ON(1);
return;
}
del_timer_sync(&ep->timer);
@ -1756,9 +1755,8 @@ static void ep_timeout(unsigned long arg)
__state_set(&ep->com, ABORTING);
break;
default:
printk(KERN_ERR "%s unexpected state ep %p state %u\n",
WARN(1, "%s unexpected state ep %p state %u\n",
__func__, ep, ep->com.state);
WARN_ON(1);
abort = 0;
}
spin_unlock_irqrestore(&ep->com.lock, flags);

@ -151,9 +151,8 @@ static void stop_ep_timer(struct c4iw_ep *ep)
{
PDBG("%s ep %p\n", __func__, ep);
if (!timer_pending(&ep->timer)) {
printk(KERN_ERR "%s timer stopped when its not running! "
WARN(1, "%s timer stopped when its not running! "
"ep %p state %u\n", __func__, ep, ep->com.state);
WARN_ON(1);
return;
}
del_timer_sync(&ep->timer);
@ -2551,9 +2550,8 @@ static void process_timeout(struct c4iw_ep *ep)
__state_set(&ep->com, ABORTING);
break;
default:
printk(KERN_ERR "%s unexpected state ep %p tid %u state %u\n",
WARN(1, "%s unexpected state ep %p tid %u state %u\n",
__func__, ep, ep->hwtid, ep->com.state);
WARN_ON(1);
abort = 0;
}
mutex_unlock(&ep->com.mutex);

@ -718,16 +718,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
if (ret)
goto done;
/*
* we ignore most issues after reporting them, but have to specially
* handle hardware-disabled chips.
*/
if (ret == 2) {
/* unique error, known to ipath_init_one */
ret = -EPERM;
goto done;
}
/*
* We could bump this to allow for full rcvegrcnt + rcvtidcnt,
* but then it no longer nicely fits power of two, and since

@ -268,15 +268,15 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
unsigned long flags;
spin_lock_irqsave(&sriov->going_down_lock, flags);
spin_lock(&sriov->id_map_lock);
spin_lock_irqsave(&sriov->going_down_lock, flags);
/*make sure that there is no schedule inside the scheduled work.*/
if (!sriov->is_going_down) {
id->scheduled_delete = 1;
schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
}
spin_unlock(&sriov->id_map_lock);
spin_unlock_irqrestore(&sriov->going_down_lock, flags);
spin_unlock(&sriov->id_map_lock);
}
int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,

@ -66,7 +66,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
{
return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe));
return mlx4_buf_offset(&buf->buf, n * buf->entry_size);
}
static void *get_cqe(struct mlx4_ib_cq *cq, int n)
@ -77,8 +77,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n)
static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
{
struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe);
return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
!!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
}
@ -99,12 +100,13 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
{
int err;
err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe),
err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
PAGE_SIZE * 2, &buf->buf);
if (err)
goto out;
buf->entry_size = dev->dev->caps.cqe_size;
err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
&buf->mtt);
if (err)
@ -120,8 +122,7 @@ err_mtt:
mlx4_mtt_cleanup(dev->dev, &buf->mtt);
err_buf:
mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe),
&buf->buf);
mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf);
out:
return err;
@ -129,7 +130,7 @@ out:
static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
{
mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf);
mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
}
static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
@ -137,8 +138,9 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
u64 buf_addr, int cqe)
{
int err;
int cqe_size = dev->dev->caps.cqe_size;
*umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe),
*umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
@ -331,16 +333,23 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
{
struct mlx4_cqe *cqe, *new_cqe;
int i;
int cqe_size = cq->buf.entry_size;
int cqe_inc = cqe_size == 64 ? 1 : 0;
i = cq->mcq.cons_index;
cqe = get_cqe(cq, i & cq->ibcq.cqe);
cqe += cqe_inc;
while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
(i + 1) & cq->resize_buf->cqe);
memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size);
new_cqe += cqe_inc;
new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
(((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
cqe += cqe_inc;
}
++cq->mcq.cons_index;
}
@ -438,6 +447,7 @@ err_buf:
out:
mutex_unlock(&cq->resize_mutex);
return err;
}
@ -586,6 +596,9 @@ repoll:
if (!cqe)
return -EAGAIN;
if (cq->buf.entry_size == 64)
cqe++;
++cq->mcq.cons_index;
/*
@ -807,6 +820,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
int nfreed = 0;
struct mlx4_cqe *cqe, *dest;
u8 owner_bit;
int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0;
/*
* First we need to find the current producer index, so we
@ -825,12 +839,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
*/
while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
cqe += cqe_inc;
if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
++nfreed;
} else if (nfreed) {
dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
dest += cqe_inc;
owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
memcpy(dest, cqe, sizeof *cqe);
dest->owner_sr_opcode = owner_bit |

@ -563,15 +563,24 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_ucontext *context;
struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
struct mlx4_ib_alloc_ucontext_resp resp;
int err;
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
resp.qp_tab_size = dev->dev->caps.num_qps;
resp.bf_reg_size = dev->dev->caps.bf_reg_size;
resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
resp_v3.qp_tab_size = dev->dev->caps.num_qps;
resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
} else {
resp.dev_caps = dev->dev->caps.userspace_caps;
resp.qp_tab_size = dev->dev->caps.num_qps;
resp.bf_reg_size = dev->dev->caps.bf_reg_size;
resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
resp.cqe_size = dev->dev->caps.cqe_size;
}
context = kmalloc(sizeof *context, GFP_KERNEL);
if (!context)
@ -586,7 +595,11 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
err = ib_copy_to_udata(udata, &resp, sizeof resp);
if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
else
err = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (err) {
mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
kfree(context);
@ -1342,7 +1355,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dma_device = &dev->pdev->dev;
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
if (dev->caps.userspace_caps)
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
else
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
ibdev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |

@ -90,6 +90,7 @@ struct mlx4_ib_xrcd {
struct mlx4_ib_cq_buf {
struct mlx4_buf buf;
struct mlx4_mtt mtt;
int entry_size;
};
struct mlx4_ib_cq_resize {

@ -40,7 +40,9 @@
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
#define MLX4_IB_UVERBS_ABI_VERSION 3
#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3
#define MLX4_IB_UVERBS_ABI_VERSION 4
/*
* Make sure that all structs defined in this file remain laid out so
@ -50,12 +52,20 @@
* instead.
*/
struct mlx4_ib_alloc_ucontext_resp {
struct mlx4_ib_alloc_ucontext_resp_v3 {
__u32 qp_tab_size;
__u16 bf_reg_size;
__u16 bf_regs_per_page;
};
struct mlx4_ib_alloc_ucontext_resp {
__u32 dev_caps;
__u32 qp_tab_size;
__u16 bf_reg_size;
__u16 bf_regs_per_page;
__u32 cqe_size;
};
struct mlx4_ib_alloc_pd_resp {
__u32 pdn;
__u32 reserved;

@ -532,6 +532,7 @@ void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *);
int nes_destroy_cqp(struct nes_device *);
int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
void nes_recheck_link_status(struct work_struct *work);
void nes_terminate_timeout(unsigned long context);
/* nes_nic.c */
struct net_device *nes_netdev_init(struct nes_device *, void __iomem *);

@ -629,11 +629,9 @@ static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_a
case SEND_RDMA_READ_ZERO:
default:
if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO) {
printk(KERN_ERR "%s[%u]: Unsupported RDMA0 len operation=%u\n",
__func__, __LINE__, cm_node->send_rdma0_op);
WARN_ON(1);
}
if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO)
WARN(1, "Unsupported RDMA0 len operation=%u\n",
cm_node->send_rdma0_op);
nes_debug(NES_DBG_CM, "Sending first rdma operation.\n");
wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
cpu_to_le32(NES_IWARP_SQ_OP_RDMAR);
@ -671,7 +669,6 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
struct nes_cm_core *cm_core = cm_node->cm_core;
struct nes_timer_entry *new_send;
int ret = 0;
u32 was_timer_set;
new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
if (!new_send)
@ -723,12 +720,8 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
}
}
was_timer_set = timer_pending(&cm_core->tcp_timer);
if (!was_timer_set) {
cm_core->tcp_timer.expires = new_send->timetosend;
add_timer(&cm_core->tcp_timer);
}
if (!timer_pending(&cm_core->tcp_timer))
mod_timer(&cm_core->tcp_timer, new_send->timetosend);
return ret;
}
@ -946,10 +939,8 @@ static void nes_cm_timer_tick(unsigned long pass)
}
if (settimer) {
if (!timer_pending(&cm_core->tcp_timer)) {
cm_core->tcp_timer.expires = nexttimeout;
add_timer(&cm_core->tcp_timer);
}
if (!timer_pending(&cm_core->tcp_timer))
mod_timer(&cm_core->tcp_timer, nexttimeout);
}
}
@ -1314,8 +1305,6 @@ static int mini_cm_del_listen(struct nes_cm_core *cm_core,
static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
struct nes_cm_node *cm_node)
{
u32 was_timer_set;
cm_node->accelerated = 1;
if (cm_node->accept_pend) {
@ -1325,11 +1314,8 @@ static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
}
was_timer_set = timer_pending(&cm_core->tcp_timer);
if (!was_timer_set) {
cm_core->tcp_timer.expires = jiffies + NES_SHORT_TIME;
add_timer(&cm_core->tcp_timer);
}
if (!timer_pending(&cm_core->tcp_timer))
mod_timer(&cm_core->tcp_timer, (jiffies + NES_SHORT_TIME));
return 0;
}

@ -75,7 +75,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
static void process_critical_error(struct nes_device *nesdev);
static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
static void nes_terminate_timeout(unsigned long context);
static void nes_terminate_start_timer(struct nes_qp *nesqp);
#ifdef CONFIG_INFINIBAND_NES_DEBUG
@ -3520,7 +3519,7 @@ static void nes_terminate_received(struct nes_device *nesdev,
}
/* Timeout routine in case terminate fails to complete */
static void nes_terminate_timeout(unsigned long context)
void nes_terminate_timeout(unsigned long context)
{
struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context;
@ -3530,11 +3529,7 @@ static void nes_terminate_timeout(unsigned long context)
/* Set a timer in case hw cannot complete the terminate sequence */
static void nes_terminate_start_timer(struct nes_qp *nesqp)
{
init_timer(&nesqp->terminate_timer);
nesqp->terminate_timer.function = nes_terminate_timeout;
nesqp->terminate_timer.expires = jiffies + HZ;
nesqp->terminate_timer.data = (unsigned long)nesqp;
add_timer(&nesqp->terminate_timer);
mod_timer(&nesqp->terminate_timer, (jiffies + HZ));
}
/**

@ -210,6 +210,9 @@ static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp
}
while (1) {
if (skb_queue_empty(&nesqp->pau_list))
goto out;
seq = nes_get_seq(skb, ack, wnd, fin_rcvd, rst_rcvd);
if (seq == nextseq) {
if (skb->len || processacks)
@ -218,14 +221,13 @@ static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp
goto out;
}
if (skb->next == (struct sk_buff *)&nesqp->pau_list)
goto out;
old_skb = skb;
skb = skb->next;
skb_unlink(old_skb, &nesqp->pau_list);
nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE);
nes_rem_ref_cm_node(nesqp->cm_node);
if (skb == (struct sk_buff *)&nesqp->pau_list)
goto out;
}
return skb;
@ -245,7 +247,6 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
struct nes_rskb_cb *cb;
struct pau_fpdu_info *fpdu_info = NULL;
struct pau_fpdu_frag frags[MAX_FPDU_FRAGS];
unsigned long flags;
u32 fpdu_len = 0;
u32 tmp_len;
int frag_cnt = 0;
@ -260,12 +261,10 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
*pau_fpdu_info = NULL;
spin_lock_irqsave(&nesqp->pau_lock, flags);
skb = nes_get_next_skb(nesdev, nesqp, NULL, nesqp->pau_rcv_nxt, &ack, &wnd, &fin_rcvd, &rst_rcvd);
if (!skb) {
spin_unlock_irqrestore(&nesqp->pau_lock, flags);
if (!skb)
goto out;
}
cb = (struct nes_rskb_cb *)&skb->cb[0];
if (skb->len) {
fpdu_len = be16_to_cpu(*(__be16 *) skb->data) + MPA_FRAMING;
@ -290,10 +289,9 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
skb = nes_get_next_skb(nesdev, nesqp, skb,
nesqp->pau_rcv_nxt + frag_tot, &ack, &wnd, &fin_rcvd, &rst_rcvd);
if (!skb) {
spin_unlock_irqrestore(&nesqp->pau_lock, flags);
if (!skb)
goto out;
} else if (rst_rcvd) {
if (rst_rcvd) {
/* rst received in the middle of fpdu */
for (; i >= 0; i--) {
skb_unlink(frags[i].skb, &nesqp->pau_list);
@ -320,8 +318,6 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
frag_cnt = 1;
}
spin_unlock_irqrestore(&nesqp->pau_lock, flags);
/* Found one */
fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC);
if (fpdu_info == NULL) {
@ -383,9 +379,8 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
if (frags[i].skb->len == 0) {
/* Pull skb off the list - it will be freed in the callback */
spin_lock_irqsave(&nesqp->pau_lock, flags);
skb_unlink(frags[i].skb, &nesqp->pau_list);
spin_unlock_irqrestore(&nesqp->pau_lock, flags);
if (!skb_queue_empty(&nesqp->pau_list))
skb_unlink(frags[i].skb, &nesqp->pau_list);
} else {
/* Last skb still has data so update the seq */
iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
@ -414,14 +409,18 @@ static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
struct pau_fpdu_info *fpdu_info;
struct nes_hw_cqp_wqe *cqp_wqe;
struct nes_cqp_request *cqp_request;
unsigned long flags;
u64 u64tmp;
u32 u32tmp;
int rc;
while (1) {
spin_lock_irqsave(&nesqp->pau_lock, flags);
rc = get_fpdu_info(nesdev, nesqp, &fpdu_info);
if (fpdu_info == NULL)
if (rc || (fpdu_info == NULL)) {
spin_unlock_irqrestore(&nesqp->pau_lock, flags);
return rc;
}
cqp_request = fpdu_info->cqp_request;
cqp_wqe = &cqp_request->cqp_wqe;
@ -447,7 +446,7 @@ static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX,
lower_32_bits(u64tmp));
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_HIGH_IDX,
upper_32_bits(u64tmp >> 32));
upper_32_bits(u64tmp));
set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
lower_32_bits(fpdu_info->frags[0].physaddr));
@ -475,6 +474,7 @@ static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
atomic_set(&cqp_request->refcount, 1);
nes_post_cqp_request(nesdev, cqp_request);
spin_unlock_irqrestore(&nesqp->pau_lock, flags);
}
return 0;
@ -649,11 +649,9 @@ static void nes_chg_qh_handler(struct nes_device *nesdev, struct nes_cqp_request
nesqp = qh_chg->nesqp;
/* Should we handle the bad completion */
if (cqp_request->major_code) {
printk(KERN_ERR PFX "Invalid cqp_request major_code=0x%x\n",
if (cqp_request->major_code)
WARN(1, PFX "Invalid cqp_request major_code=0x%x\n",
cqp_request->major_code);
WARN_ON(1);
}
switch (nesqp->pau_state) {
case PAU_DEL_QH:

@ -944,12 +944,13 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
addr,
perfect_filter_register_address+(mc_index * 8),
mc_nic_index);
macaddr_high = ((u16) addr[0]) << 8;
macaddr_high += (u16) addr[1];
macaddr_low = ((u32) addr[2]) << 24;
macaddr_low += ((u32) addr[3]) << 16;
macaddr_low += ((u32) addr[4]) << 8;
macaddr_low += (u32) addr[5];
macaddr_high = ((u8) addr[0]) << 8;
macaddr_high += (u8) addr[1];
macaddr_low = ((u8) addr[2]) << 24;
macaddr_low += ((u8) addr[3]) << 16;
macaddr_low += ((u8) addr[4]) << 8;
macaddr_low += (u8) addr[5];
nes_write_indexed(nesdev,
perfect_filter_register_address+(mc_index * 8),
macaddr_low);

@ -1404,6 +1404,9 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
}
nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
init_timer(&nesqp->terminate_timer);
nesqp->terminate_timer.function = nes_terminate_timeout;
nesqp->terminate_timer.data = (unsigned long)nesqp;
/* update the QP table */
nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
@ -1413,7 +1416,6 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
return &nesqp->ibqp;
}
/**
* nes_clean_cq
*/
@ -2559,6 +2561,11 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ibmr;
case IWNES_MEMREG_TYPE_QP:
case IWNES_MEMREG_TYPE_CQ:
if (!region->length) {
nes_debug(NES_DBG_MR, "Unable to register zero length region for CQ\n");
ib_umem_release(region);
return ERR_PTR(-EINVAL);
}
nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL);
if (!nespbl) {
nes_debug(NES_DBG_MR, "Unable to allocate PBL\n");

@ -222,27 +222,29 @@ static int srp_new_cm_id(struct srp_target_port *target)
static int srp_create_target_ib(struct srp_target_port *target)
{
struct ib_qp_init_attr *init_attr;
struct ib_cq *recv_cq, *send_cq;
struct ib_qp *qp;
int ret;
init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
if (!init_attr)
return -ENOMEM;
target->recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0);
if (IS_ERR(target->recv_cq)) {
ret = PTR_ERR(target->recv_cq);
recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0);
if (IS_ERR(recv_cq)) {
ret = PTR_ERR(recv_cq);
goto err;
}
target->send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
srp_send_completion, NULL, target, SRP_SQ_SIZE, 0);
if (IS_ERR(target->send_cq)) {
ret = PTR_ERR(target->send_cq);
send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
srp_send_completion, NULL, target, SRP_SQ_SIZE, 0);
if (IS_ERR(send_cq)) {
ret = PTR_ERR(send_cq);
goto err_recv_cq;
}
ib_req_notify_cq(target->recv_cq, IB_CQ_NEXT_COMP);
ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
init_attr->event_handler = srp_qp_event;
init_attr->cap.max_send_wr = SRP_SQ_SIZE;
@ -251,30 +253,41 @@ static int srp_create_target_ib(struct srp_target_port *target)
init_attr->cap.max_send_sge = 1;
init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
init_attr->qp_type = IB_QPT_RC;
init_attr->send_cq = target->send_cq;
init_attr->recv_cq = target->recv_cq;
init_attr->send_cq = send_cq;
init_attr->recv_cq = recv_cq;
target->qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr);
if (IS_ERR(target->qp)) {
ret = PTR_ERR(target->qp);
qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto err_send_cq;
}
ret = srp_init_qp(target, target->qp);
ret = srp_init_qp(target, qp);
if (ret)
goto err_qp;
if (target->qp)
ib_destroy_qp(target->qp);
if (target->recv_cq)
ib_destroy_cq(target->recv_cq);
if (target->send_cq)
ib_destroy_cq(target->send_cq);
target->qp = qp;
target->recv_cq = recv_cq;
target->send_cq = send_cq;
kfree(init_attr);
return 0;
err_qp:
ib_destroy_qp(target->qp);
ib_destroy_qp(qp);
err_send_cq:
ib_destroy_cq(target->send_cq);
ib_destroy_cq(send_cq);
err_recv_cq:
ib_destroy_cq(target->recv_cq);
ib_destroy_cq(recv_cq);
err:
kfree(init_attr);
@ -289,6 +302,9 @@ static void srp_free_target_ib(struct srp_target_port *target)
ib_destroy_cq(target->send_cq);
ib_destroy_cq(target->recv_cq);
target->qp = NULL;
target->send_cq = target->recv_cq = NULL;
for (i = 0; i < SRP_RQ_SIZE; ++i)
srp_free_iu(target->srp_host, target->rx_ring[i]);
for (i = 0; i < SRP_SQ_SIZE; ++i)
@ -428,34 +444,50 @@ static int srp_send_req(struct srp_target_port *target)
return status;
}
static void srp_disconnect_target(struct srp_target_port *target)
{
/* XXX should send SRP_I_LOGOUT request */
init_completion(&target->done);
if (ib_send_cm_dreq(target->cm_id, NULL, 0)) {
shost_printk(KERN_DEBUG, target->scsi_host,
PFX "Sending CM DREQ failed\n");
return;
}
wait_for_completion(&target->done);
}
static bool srp_change_state(struct srp_target_port *target,
enum srp_target_state old,
enum srp_target_state new)
static bool srp_queue_remove_work(struct srp_target_port *target)
{
bool changed = false;
spin_lock_irq(&target->lock);
if (target->state == old) {
target->state = new;
if (target->state != SRP_TARGET_REMOVED) {
target->state = SRP_TARGET_REMOVED;
changed = true;
}
spin_unlock_irq(&target->lock);
if (changed)
queue_work(system_long_wq, &target->remove_work);
return changed;
}
static bool srp_change_conn_state(struct srp_target_port *target,
bool connected)
{
bool changed = false;
spin_lock_irq(&target->lock);
if (target->connected != connected) {
target->connected = connected;
changed = true;
}
spin_unlock_irq(&target->lock);
return changed;
}
static void srp_disconnect_target(struct srp_target_port *target)
{
if (srp_change_conn_state(target, false)) {
/* XXX should send SRP_I_LOGOUT request */
if (ib_send_cm_dreq(target->cm_id, NULL, 0)) {
shost_printk(KERN_DEBUG, target->scsi_host,
PFX "Sending CM DREQ failed\n");
}
}
}
static void srp_free_req_data(struct srp_target_port *target)
{
struct ib_device *ibdev = target->srp_host->srp_dev->dev;
@ -489,32 +521,50 @@ static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
device_remove_file(&shost->shost_dev, *attr);
}
static void srp_remove_work(struct work_struct *work)
static void srp_remove_target(struct srp_target_port *target)
{
struct srp_target_port *target =
container_of(work, struct srp_target_port, work);
if (!srp_change_state(target, SRP_TARGET_DEAD, SRP_TARGET_REMOVED))
return;
spin_lock(&target->srp_host->target_lock);
list_del(&target->list);
spin_unlock(&target->srp_host->target_lock);
WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
srp_del_scsi_host_attr(target->scsi_host);
srp_remove_host(target->scsi_host);
scsi_remove_host(target->scsi_host);
srp_disconnect_target(target);
ib_destroy_cm_id(target->cm_id);
srp_free_target_ib(target);
srp_free_req_data(target);
scsi_host_put(target->scsi_host);
}
static void srp_remove_work(struct work_struct *work)
{
struct srp_target_port *target =
container_of(work, struct srp_target_port, remove_work);
WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
spin_lock(&target->srp_host->target_lock);
list_del(&target->list);
spin_unlock(&target->srp_host->target_lock);
srp_remove_target(target);
}
static void srp_rport_delete(struct srp_rport *rport)
{
struct srp_target_port *target = rport->lld_data;
srp_queue_remove_work(target);
}
static int srp_connect_target(struct srp_target_port *target)
{
int retries = 3;
int ret;
WARN_ON_ONCE(target->connected);
target->qp_in_error = false;
ret = srp_lookup_path(target);
if (ret)
return ret;
@ -534,6 +584,7 @@ static int srp_connect_target(struct srp_target_port *target)
*/
switch (target->status) {
case 0:
srp_change_conn_state(target, true);
return 0;
case SRP_PORT_REDIRECT:
@ -646,13 +697,14 @@ static void srp_reset_req(struct srp_target_port *target, struct srp_request *re
static int srp_reconnect_target(struct srp_target_port *target)
{
struct ib_qp_attr qp_attr;
struct ib_wc wc;
struct Scsi_Host *shost = target->scsi_host;
int i, ret;
if (!srp_change_state(target, SRP_TARGET_LIVE, SRP_TARGET_CONNECTING))
if (target->state != SRP_TARGET_LIVE)
return -EAGAIN;
scsi_target_block(&shost->shost_gendev);
srp_disconnect_target(target);
/*
* Now get a new local CM ID so that we avoid confusing the
@ -660,21 +712,11 @@ static int srp_reconnect_target(struct srp_target_port *target)
*/
ret = srp_new_cm_id(target);
if (ret)
goto err;
goto unblock;
qp_attr.qp_state = IB_QPS_RESET;
ret = ib_modify_qp(target->qp, &qp_attr, IB_QP_STATE);
ret = srp_create_target_ib(target);
if (ret)
goto err;
ret = srp_init_qp(target, target->qp);
if (ret)
goto err;
while (ib_poll_cq(target->recv_cq, 1, &wc) > 0)
; /* nothing */
while (ib_poll_cq(target->send_cq, 1, &wc) > 0)
; /* nothing */
goto unblock;
for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
struct srp_request *req = &target->req_ring[i];
@ -686,13 +728,16 @@ static int srp_reconnect_target(struct srp_target_port *target)
for (i = 0; i < SRP_SQ_SIZE; ++i)
list_add(&target->tx_ring[i]->list, &target->free_tx);
target->qp_in_error = 0;
ret = srp_connect_target(target);
unblock:
scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING :
SDEV_TRANSPORT_OFFLINE);
if (ret)
goto err;
if (!srp_change_state(target, SRP_TARGET_CONNECTING, SRP_TARGET_LIVE))
ret = -EAGAIN;
shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n");
return ret;
@ -705,17 +750,8 @@ err:
* However, we have to defer the real removal because we
* are in the context of the SCSI error handler now, which
* will deadlock if we call scsi_remove_host().
*
* Schedule our work inside the lock to avoid a race with
* the flush_scheduled_work() in srp_remove_one().
*/
spin_lock_irq(&target->lock);
if (target->state == SRP_TARGET_CONNECTING) {
target->state = SRP_TARGET_DEAD;
INIT_WORK(&target->work, srp_remove_work);
queue_work(ib_wq, &target->work);
}
spin_unlock_irq(&target->lock);
srp_queue_remove_work(target);
return ret;
}
@ -1262,6 +1298,19 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
PFX "Recv failed with error code %d\n", res);
}
static void srp_handle_qp_err(enum ib_wc_status wc_status,
enum ib_wc_opcode wc_opcode,
struct srp_target_port *target)
{
if (target->connected && !target->qp_in_error) {
shost_printk(KERN_ERR, target->scsi_host,
PFX "failed %s status %d\n",
wc_opcode & IB_WC_RECV ? "receive" : "send",
wc_status);
}
target->qp_in_error = true;
}
static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
{
struct srp_target_port *target = target_ptr;
@ -1269,15 +1318,11 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
while (ib_poll_cq(cq, 1, &wc) > 0) {
if (wc.status) {
shost_printk(KERN_ERR, target->scsi_host,
PFX "failed receive status %d\n",
wc.status);
target->qp_in_error = 1;
break;
if (likely(wc.status == IB_WC_SUCCESS)) {
srp_handle_recv(target, &wc);
} else {
srp_handle_qp_err(wc.status, wc.opcode, target);
}
srp_handle_recv(target, &wc);
}
}
@ -1288,16 +1333,12 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
struct srp_iu *iu;
while (ib_poll_cq(cq, 1, &wc) > 0) {
if (wc.status) {
shost_printk(KERN_ERR, target->scsi_host,
PFX "failed send status %d\n",
wc.status);
target->qp_in_error = 1;
break;
if (likely(wc.status == IB_WC_SUCCESS)) {
iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
list_add(&iu->list, &target->free_tx);
} else {
srp_handle_qp_err(wc.status, wc.opcode, target);
}
iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
list_add(&iu->list, &target->free_tx);
}
}
@ -1311,16 +1352,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
unsigned long flags;
int len;
if (target->state == SRP_TARGET_CONNECTING)
goto err;
if (target->state == SRP_TARGET_DEAD ||
target->state == SRP_TARGET_REMOVED) {
scmnd->result = DID_BAD_TARGET << 16;
scmnd->scsi_done(scmnd);
return 0;
}
spin_lock_irqsave(&target->lock, flags);
iu = __srp_get_tx_iu(target, SRP_IU_CMD);
if (!iu)
@ -1377,7 +1408,6 @@ err_iu:
err_unlock:
spin_unlock_irqrestore(&target->lock, flags);
err:
return SCSI_MLQUEUE_HOST_BUSY;
}
@ -1419,6 +1449,33 @@ err:
return -ENOMEM;
}
static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
{
uint64_t T_tr_ns, max_compl_time_ms;
uint32_t rq_tmo_jiffies;
/*
* According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
* table 91), both the QP timeout and the retry count have to be set
* for RC QP's during the RTR to RTS transition.
*/
WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
(IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
/*
* Set target->rq_tmo_jiffies to one second more than the largest time
* it can take before an error completion is generated. See also
* C9-140..142 in the IBTA spec for more information about how to
* convert the QP Local ACK Timeout value to nanoseconds.
*/
T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
do_div(max_compl_time_ms, NSEC_PER_MSEC);
rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
return rq_tmo_jiffies;
}
static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
struct srp_login_rsp *lrsp,
struct srp_target_port *target)
@ -1478,6 +1535,8 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
if (ret)
goto error_free;
target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
ret = ib_modify_qp(target->qp, qp_attr, attr_mask);
if (ret)
goto error_free;
@ -1599,6 +1658,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
case IB_CM_DREQ_RECEIVED:
shost_printk(KERN_WARNING, target->scsi_host,
PFX "DREQ received - connection closed\n");
srp_change_conn_state(target, false);
if (ib_send_cm_drep(cm_id, NULL, 0))
shost_printk(KERN_ERR, target->scsi_host,
PFX "Sending CM DREP failed\n");
@ -1608,7 +1668,6 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
shost_printk(KERN_ERR, target->scsi_host,
PFX "connection closed\n");
comp = 1;
target->status = 0;
break;
@ -1636,10 +1695,6 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
struct srp_iu *iu;
struct srp_tsk_mgmt *tsk_mgmt;
if (target->state == SRP_TARGET_DEAD ||
target->state == SRP_TARGET_REMOVED)
return -1;
init_completion(&target->tsk_mgmt_done);
spin_lock_irq(&target->lock);
@ -1729,6 +1784,21 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
return ret;
}
static int srp_slave_configure(struct scsi_device *sdev)
{
struct Scsi_Host *shost = sdev->host;
struct srp_target_port *target = host_to_target(shost);
struct request_queue *q = sdev->request_queue;
unsigned long timeout;
if (sdev->type == TYPE_DISK) {
timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
blk_queue_rq_timeout(q, timeout);
}
return 0;
}
static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
char *buf)
{
@ -1861,6 +1931,7 @@ static struct scsi_host_template srp_template = {
.module = THIS_MODULE,
.name = "InfiniBand SRP initiator",
.proc_name = DRV_NAME,
.slave_configure = srp_slave_configure,
.info = srp_target_info,
.queuecommand = srp_queuecommand,
.eh_abort_handler = srp_abort,
@ -1894,11 +1965,14 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
return PTR_ERR(rport);
}
rport->lld_data = target;
spin_lock(&host->target_lock);
list_add_tail(&target->list, &host->target_list);
spin_unlock(&host->target_lock);
target->state = SRP_TARGET_LIVE;
target->connected = false;
scsi_scan_target(&target->scsi_host->shost_gendev,
0, target->scsi_id, SCAN_WILD_CARD, 0);
@ -2188,6 +2262,7 @@ static ssize_t srp_create_target(struct device *dev,
sizeof (struct srp_indirect_buf) +
target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
INIT_WORK(&target->remove_work, srp_remove_work);
spin_lock_init(&target->lock);
INIT_LIST_HEAD(&target->free_tx);
INIT_LIST_HEAD(&target->free_reqs);
@ -2232,7 +2307,6 @@ static ssize_t srp_create_target(struct device *dev,
if (ret)
goto err_free_ib;
target->qp_in_error = 0;
ret = srp_connect_target(target);
if (ret) {
shost_printk(KERN_ERR, target->scsi_host,
@ -2422,8 +2496,7 @@ static void srp_remove_one(struct ib_device *device)
{
struct srp_device *srp_dev;
struct srp_host *host, *tmp_host;
LIST_HEAD(target_list);
struct srp_target_port *target, *tmp_target;
struct srp_target_port *target;
srp_dev = ib_get_client_data(device, &srp_client);
@ -2436,35 +2509,17 @@ static void srp_remove_one(struct ib_device *device)
wait_for_completion(&host->released);
/*
* Mark all target ports as removed, so we stop queueing
* commands and don't try to reconnect.
* Remove all target ports.
*/
spin_lock(&host->target_lock);
list_for_each_entry(target, &host->target_list, list) {
spin_lock_irq(&target->lock);
target->state = SRP_TARGET_REMOVED;
spin_unlock_irq(&target->lock);
}
list_for_each_entry(target, &host->target_list, list)
srp_queue_remove_work(target);
spin_unlock(&host->target_lock);
/*
* Wait for any reconnection tasks that may have
* started before we marked our target ports as
* removed, and any target port removal tasks.
* Wait for target port removal tasks.
*/
flush_workqueue(ib_wq);
list_for_each_entry_safe(target, tmp_target,
&host->target_list, list) {
srp_del_scsi_host_attr(target->scsi_host);
srp_remove_host(target->scsi_host);
scsi_remove_host(target->scsi_host);
srp_disconnect_target(target);
ib_destroy_cm_id(target->cm_id);
srp_free_target_ib(target);
srp_free_req_data(target);
scsi_host_put(target->scsi_host);
}
flush_workqueue(system_long_wq);
kfree(host);
}
@ -2478,6 +2533,7 @@ static void srp_remove_one(struct ib_device *device)
}
static struct srp_function_template ib_srp_transport_functions = {
.rport_delete = srp_rport_delete,
};
static int __init srp_init_module(void)

@ -80,9 +80,7 @@ enum {
enum srp_target_state {
SRP_TARGET_LIVE,
SRP_TARGET_CONNECTING,
SRP_TARGET_DEAD,
SRP_TARGET_REMOVED
SRP_TARGET_REMOVED,
};
enum srp_iu_type {
@ -163,6 +161,9 @@ struct srp_target_port {
struct ib_sa_query *path_query;
int path_query_id;
u32 rq_tmo_jiffies;
bool connected;
struct ib_cm_id *cm_id;
int max_ti_iu_len;
@ -173,12 +174,12 @@ struct srp_target_port {
struct srp_iu *rx_ring[SRP_RQ_SIZE];
struct srp_request req_ring[SRP_CMD_SQ_SIZE];
struct work_struct work;
struct work_struct remove_work;
struct list_head list;
struct completion done;
int status;
int qp_in_error;
bool qp_in_error;
struct completion tsk_mgmt_done;
u8 tsk_mgmt_status;

@ -1498,6 +1498,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
u32 reply;
u8 is_going_down = 0;
int i;
unsigned long flags;
slave_state[slave].comm_toggle ^= 1;
reply = (u32) slave_state[slave].comm_toggle << 31;
@ -1576,12 +1577,12 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave);
goto reset_slave;
}
spin_lock(&priv->mfunc.master.slave_state_lock);
spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
if (!slave_state[slave].is_slave_going_down)
slave_state[slave].last_cmd = cmd;
else
is_going_down = 1;
spin_unlock(&priv->mfunc.master.slave_state_lock);
spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
if (is_going_down) {
mlx4_warn(dev, "Slave is going down aborting command(%d)"
" executing from slave:%d\n",
@ -1597,10 +1598,10 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
reset_slave:
/* cleanup any slave resources */
mlx4_delete_all_resources_for_slave(dev, slave);
spin_lock(&priv->mfunc.master.slave_state_lock);
spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
if (!slave_state[slave].is_slave_going_down)
slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET;
spin_unlock(&priv->mfunc.master.slave_state_lock);
spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
/*with slave in the middle of flr, no need to clean resources again.*/
inform_slave_state:
memset(&slave_state[slave].event_eq, 0,
@ -1755,7 +1756,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
spin_lock_init(&s_state->lock);
}
memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe));
memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size);
priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD;
INIT_WORK(&priv->mfunc.master.comm_work,
mlx4_master_comm_channel);

@ -51,7 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
int err;
cq->size = entries;
cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
cq->buf_size = cq->size * mdev->dev->caps.cqe_size;
cq->ring = ring;
cq->is_tx = mode;

@ -1604,6 +1604,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
goto out;
}
priv->rx_ring_num = prof->rx_ring_num;
priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0;
priv->mac_index = -1;
priv->msg_enable = MLX4_EN_MSG_LEVEL;
spin_lock_init(&priv->stats_lock);

@ -566,6 +566,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
struct ethhdr *ethh;
dma_addr_t dma;
u64 s_mac;
int factor = priv->cqe_factor;
if (!priv->port_up)
return 0;
@ -574,7 +575,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
* descriptor offset can be deduced from the CQE index instead of
* reading 'cqe->index' */
index = cq->mcq.cons_index & ring->size_mask;
cqe = &cq->buf[index];
cqe = &cq->buf[(index << factor) + factor];
/* Process all completed CQEs */
while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
@ -709,7 +710,7 @@ next:
++cq->mcq.cons_index;
index = (cq->mcq.cons_index) & ring->size_mask;
cqe = &cq->buf[index];
cqe = &cq->buf[(index << factor) + factor];
if (++polled == budget)
goto out;
}

@ -315,12 +315,13 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
struct mlx4_cqe *buf = cq->buf;
u32 packets = 0;
u32 bytes = 0;
int factor = priv->cqe_factor;
if (!priv->port_up)
return;
index = cons_index & size_mask;
cqe = &buf[index];
cqe = &buf[(index << factor) + factor];
ring_index = ring->cons & size_mask;
/* Process all completed CQEs */
@ -349,7 +350,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
++cons_index;
index = cons_index & size_mask;
cqe = &buf[index];
cqe = &buf[(index << factor) + factor];
}

@ -101,15 +101,21 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not)
mb();
}
static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry)
static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor)
{
unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE;
return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
/* (entry & (eq->nent - 1)) gives us a cyclic array */
unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor);
/* CX3 is capable of extending the EQE from 32 to 64 bytes.
* When this feature is enabled, the first (in the lower addresses)
* 32 bytes in the 64 byte EQE are reserved and the next 32 bytes
* contain the legacy EQE information.
*/
return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE;
}
static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq)
static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor)
{
struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index);
struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor);
return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
}
@ -177,7 +183,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
return;
}
memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1);
memcpy(s_eqe, eqe, dev->caps.eqe_size - 1);
s_eqe->slave_id = slave;
/* ensure all information is written before setting the ownersip bit */
wmb();
@ -401,6 +407,7 @@ void mlx4_master_handle_slave_flr(struct work_struct *work)
struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
int i;
int err;
unsigned long flags;
mlx4_dbg(dev, "mlx4_handle_slave_flr\n");
@ -412,10 +419,10 @@ void mlx4_master_handle_slave_flr(struct work_struct *work)
mlx4_delete_all_resources_for_slave(dev, i);
/*return the slave to running mode*/
spin_lock(&priv->mfunc.master.slave_state_lock);
spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
slave_state[i].last_cmd = MLX4_COMM_CMD_RESET;
slave_state[i].is_slave_going_down = 0;
spin_unlock(&priv->mfunc.master.slave_state_lock);
spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
/*notify the FW:*/
err = mlx4_cmd(dev, 0, i, 0, MLX4_CMD_INFORM_FLR_DONE,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
@ -440,8 +447,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
u8 update_slave_state;
int i;
enum slave_port_gen_event gen_event;
unsigned long flags;
while ((eqe = next_eqe_sw(eq))) {
while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) {
/*
* Make sure we read EQ entry contents after we've
* checked the ownership bit.
@ -647,13 +655,13 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
} else
update_slave_state = 1;
spin_lock(&priv->mfunc.master.slave_state_lock);
spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
if (update_slave_state) {
priv->mfunc.master.slave_state[flr_slave].active = false;
priv->mfunc.master.slave_state[flr_slave].last_cmd = MLX4_COMM_CMD_FLR;
priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1;
}
spin_unlock(&priv->mfunc.master.slave_state_lock);
spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
queue_work(priv->mfunc.master.comm_wq,
&priv->mfunc.master.slave_flr_event_work);
break;
@ -864,7 +872,8 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
eq->dev = dev;
eq->nent = roundup_pow_of_two(max(nent, 2));
npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE;
/* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE;
eq->page_list = kmalloc(npages * sizeof *eq->page_list,
GFP_KERNEL);
@ -966,8 +975,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
int err;
int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE;
int i;
/* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))

@ -110,6 +110,8 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags)
[42] = "Multicast VEP steering support",
[48] = "Counters support",
[59] = "Port management change event support",
[61] = "64 byte EQE support",
[62] = "64 byte CQE support",
};
int i;
@ -235,7 +237,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
field = dev->caps.num_ports;
MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
size = 0; /* no PF behaviour is set for now */
size = dev->caps.function_caps; /* set PF behaviours */
MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
field = 0; /* protected FMR support not available as yet */
@ -1237,6 +1239,24 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)
*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4);
/* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) {
*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29);
dev->caps.eqe_size = 64;
dev->caps.eqe_factor = 1;
} else {
dev->caps.eqe_size = 32;
dev->caps.eqe_factor = 0;
}
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) {
*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
dev->caps.cqe_size = 64;
dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
} else {
dev->caps.cqe_size = 32;
}
/* QPC/EEC/CQC/EQC/RDMARC attributes */
MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET);
@ -1319,6 +1339,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
struct mlx4_cmd_mailbox *mailbox;
__be32 *outbox;
int err;
u8 byte_field;
#define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04
@ -1370,6 +1391,13 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
}
/* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS);
if (byte_field & 0x20) /* 64-bytes eqe enabled */
param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
if (byte_field & 0x40) /* 64-bytes cqe enabled */
param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
/* TPT attributes */
MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET);

@ -172,6 +172,7 @@ struct mlx4_init_hca_param {
u8 log_uar_sz;
u8 uar_page_sz; /* log pg sz in 4k chunks */
u8 fs_hash_enable_bits;
u64 dev_cap_enabled;
};
struct mlx4_init_ib_param {

@ -95,8 +95,14 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
" Not in use with device managed"
" flow steering");
static bool enable_64b_cqe_eqe;
module_param(enable_64b_cqe_eqe, bool, 0444);
MODULE_PARM_DESC(enable_64b_cqe_eqe,
"Enable 64 byte CQEs/EQEs when the the FW supports this");
#define HCA_GLOBAL_CAP_MASK 0
#define PF_CONTEXT_BEHAVIOUR_MASK 0
#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE
static char mlx4_version[] =
DRV_NAME ": Mellanox ConnectX core driver v"
@ -386,6 +392,21 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
if (!enable_64b_cqe_eqe) {
if (dev_cap->flags &
(MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
}
}
if ((dev_cap->flags &
(MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
mlx4_is_master(dev))
dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
return 0;
}
/*The function checks if there are live vf, return the num of them*/
@ -599,6 +620,21 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
goto err_mem;
}
if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
dev->caps.eqe_size = 64;
dev->caps.eqe_factor = 1;
} else {
dev->caps.eqe_size = 32;
dev->caps.eqe_factor = 0;
}
if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
dev->caps.cqe_size = 64;
dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
} else {
dev->caps.cqe_size = 32;
}
return 0;
err_mem:

@ -473,6 +473,7 @@ struct mlx4_en_priv {
int mac_index;
unsigned max_mtu;
int base_qpn;
int cqe_factor;
struct mlx4_en_rss_map rss_map;
__be32 ctrl_flags;

@ -38,7 +38,7 @@ struct srp_host_attrs {
#define to_srp_host_attrs(host) ((struct srp_host_attrs *)(host)->shost_data)
#define SRP_HOST_ATTRS 0
#define SRP_RPORT_ATTRS 2
#define SRP_RPORT_ATTRS 3
struct srp_internal {
struct scsi_transport_template t;
@ -47,7 +47,6 @@ struct srp_internal {
struct device_attribute *host_attrs[SRP_HOST_ATTRS + 1];
struct device_attribute *rport_attrs[SRP_RPORT_ATTRS + 1];
struct device_attribute private_rport_attrs[SRP_RPORT_ATTRS];
struct transport_container rport_attr_cont;
};
@ -72,24 +71,6 @@ static DECLARE_TRANSPORT_CLASS(srp_host_class, "srp_host", srp_host_setup,
static DECLARE_TRANSPORT_CLASS(srp_rport_class, "srp_remote_ports",
NULL, NULL, NULL);
#define SETUP_TEMPLATE(attrb, field, perm, test, ro_test, ro_perm) \
i->private_##attrb[count] = dev_attr_##field; \
i->private_##attrb[count].attr.mode = perm; \
if (ro_test) { \
i->private_##attrb[count].attr.mode = ro_perm; \
i->private_##attrb[count].store = NULL; \
} \
i->attrb[count] = &i->private_##attrb[count]; \
if (test) \
count++
#define SETUP_RPORT_ATTRIBUTE_RD(field) \
SETUP_TEMPLATE(rport_attrs, field, S_IRUGO, 1, 0, 0)
#define SETUP_RPORT_ATTRIBUTE_RW(field) \
SETUP_TEMPLATE(rport_attrs, field, S_IRUGO | S_IWUSR, \
1, 1, S_IRUGO)
#define SRP_PID(p) \
(p)->port_id[0], (p)->port_id[1], (p)->port_id[2], (p)->port_id[3], \
(p)->port_id[4], (p)->port_id[5], (p)->port_id[6], (p)->port_id[7], \
@ -135,6 +116,24 @@ show_srp_rport_roles(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR(roles, S_IRUGO, show_srp_rport_roles, NULL);
static ssize_t store_srp_rport_delete(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct srp_rport *rport = transport_class_to_srp_rport(dev);
struct Scsi_Host *shost = dev_to_shost(dev);
struct srp_internal *i = to_srp_internal(shost->transportt);
if (i->f->rport_delete) {
i->f->rport_delete(rport);
return count;
} else {
return -ENOSYS;
}
}
static DEVICE_ATTR(delete, S_IWUSR, NULL, store_srp_rport_delete);
static void srp_rport_release(struct device *dev)
{
struct srp_rport *rport = dev_to_rport(dev);
@ -324,12 +323,16 @@ srp_attach_transport(struct srp_function_template *ft)
i->rport_attr_cont.ac.attrs = &i->rport_attrs[0];
i->rport_attr_cont.ac.class = &srp_rport_class.class;
i->rport_attr_cont.ac.match = srp_rport_match;
transport_container_register(&i->rport_attr_cont);
count = 0;
SETUP_RPORT_ATTRIBUTE_RD(port_id);
SETUP_RPORT_ATTRIBUTE_RD(roles);
i->rport_attrs[count] = NULL;
i->rport_attrs[count++] = &dev_attr_port_id;
i->rport_attrs[count++] = &dev_attr_roles;
if (ft->rport_delete)
i->rport_attrs[count++] = &dev_attr_delete;
i->rport_attrs[count++] = NULL;
BUG_ON(count > ARRAY_SIZE(i->rport_attrs));
transport_container_register(&i->rport_attr_cont);
i->f = ft;

@ -142,6 +142,8 @@ enum {
MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48,
MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55,
MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59,
MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61,
MLX4_DEV_CAP_FLAG_64B_CQE = 1LL << 62
};
enum {
@ -151,6 +153,20 @@ enum {
MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3
};
enum {
MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0,
MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1
};
enum {
MLX4_USER_DEV_CAP_64B_CQE = 1L << 0
};
enum {
MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0
};
#define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90)
enum {
@ -419,6 +435,11 @@ struct mlx4_caps {
u32 max_counters;
u8 port_ib_mtu[MLX4_MAX_PORTS + 1];
u16 sqp_demux;
u32 eqe_size;
u32 cqe_size;
u8 eqe_factor;
u32 userspace_caps; /* userspace must be aware of these */
u32 function_caps; /* VFs must be aware of these */
};
struct mlx4_buf_list {

@ -1,6 +0,0 @@
header-y += ib_user_cm.h
header-y += ib_user_mad.h
header-y += ib_user_sa.h
header-y += ib_user_verbs.h
header-y += rdma_netlink.h
header-y += rdma_user_cm.h

@ -1,41 +1,9 @@
#ifndef _RDMA_NETLINK_H
#define _RDMA_NETLINK_H
#include <linux/types.h>
enum {
RDMA_NL_RDMA_CM = 1
};
#define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
#define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1))
#define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op)
enum {
RDMA_NL_RDMA_CM_ID_STATS = 0,
RDMA_NL_RDMA_CM_NUM_OPS
};
enum {
RDMA_NL_RDMA_CM_ATTR_SRC_ADDR = 1,
RDMA_NL_RDMA_CM_ATTR_DST_ADDR,
RDMA_NL_RDMA_CM_NUM_ATTR,
};
struct rdma_cm_id_stats {
__u32 qp_num;
__u32 bound_dev_if;
__u32 port_space;
__s32 pid;
__u8 cm_state;
__u8 node_type;
__u8 port_num;
__u8 qp_type;
};
#ifdef __KERNEL__
#include <linux/netlink.h>
#include <uapi/rdma/rdma_netlink.h>
struct ibnl_client_cbs {
int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb);
@ -88,6 +56,4 @@ void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
int len, void *data, int type);
#endif /* __KERNEL__ */
#endif /* _RDMA_NETLINK_H */

@ -14,13 +14,21 @@ struct srp_rport_identifiers {
};
struct srp_rport {
/* for initiator and target drivers */
struct device dev;
u8 port_id[16];
u8 roles;
/* for initiator drivers */
void *lld_data; /* LLD private data */
};
struct srp_function_template {
/* for initiator drivers */
void (*rport_delete)(struct srp_rport *rport);
/* for target drivers */
int (* tsk_mgmt_response)(struct Scsi_Host *, u64, u64, int);
int (* it_nexus_response)(struct Scsi_Host *, u64, int);

@ -1 +1,7 @@
# UAPI Header export list
header-y += ib_user_cm.h
header-y += ib_user_mad.h
header-y += ib_user_sa.h
header-y += ib_user_verbs.h
header-y += rdma_netlink.h
header-y += rdma_user_cm.h

@ -0,0 +1,37 @@
#ifndef _UAPI_RDMA_NETLINK_H
#define _UAPI_RDMA_NETLINK_H
#include <linux/types.h>
enum {
RDMA_NL_RDMA_CM = 1
};
#define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
#define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1))
#define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op)
enum {
RDMA_NL_RDMA_CM_ID_STATS = 0,
RDMA_NL_RDMA_CM_NUM_OPS
};
enum {
RDMA_NL_RDMA_CM_ATTR_SRC_ADDR = 1,
RDMA_NL_RDMA_CM_ATTR_DST_ADDR,
RDMA_NL_RDMA_CM_NUM_ATTR,
};
struct rdma_cm_id_stats {
__u32 qp_num;
__u32 bound_dev_if;
__u32 port_space;
__s32 pid;
__u8 cm_state;
__u8 node_type;
__u8 port_num;
__u8 qp_type;
};
#endif /* _UAPI_RDMA_NETLINK_H */