IB/mthca: Add device-specific support for resizing CQs

Add low-level driver support for resizing CQs (both kernel and
userspace) to mthca.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
Roland Dreier 2006-01-30 14:31:33 -08:00
parent 33b9b3ee97
commit 4885bf64bc
7 changed files with 308 additions and 52 deletions

View File

@ -1,7 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -1514,6 +1514,37 @@ int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
CMD_TIME_CLASS_A, status);
}
int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size,
u8 *status)
{
struct mthca_mailbox *mailbox;
__be32 *inbox;
int err;
#define RESIZE_CQ_IN_SIZE 0x40
#define RESIZE_CQ_LOG_SIZE_OFFSET 0x0c
#define RESIZE_CQ_LKEY_OFFSET 0x1c
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
inbox = mailbox->buf;
memset(inbox, 0, RESIZE_CQ_IN_SIZE);
/*
* Leave start address fields zeroed out -- mthca assumes that
* MRs for CQs always start at virtual address 0.
*/
MTHCA_PUT(inbox, log_size, RESIZE_CQ_LOG_SIZE_OFFSET);
MTHCA_PUT(inbox, lkey, RESIZE_CQ_LKEY_OFFSET);
err = mthca_cmd(dev, mailbox->dma, cq_num, 1, CMD_RESIZE_CQ,
CMD_TIME_CLASS_B, status);
mthca_free_mailbox(dev, mailbox);
return err;
}
int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int srq_num, u8 *status)
{

View File

@ -1,6 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -298,6 +299,8 @@ int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int cq_num, u8 *status);
int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int cq_num, u8 *status);
int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size,
u8 *status);
int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
int srq_num, u8 *status);
int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox,

View File

@ -1,7 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
@ -150,24 +150,29 @@ struct mthca_err_cqe {
#define MTHCA_ARBEL_CQ_DB_REQ_NOT (2 << 24)
#define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24)
static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
static inline struct mthca_cqe *get_cqe_from_buf(struct mthca_cq_buf *buf,
int entry)
{
if (cq->is_direct)
return cq->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE);
if (buf->is_direct)
return buf->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE);
else
return cq->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf
return buf->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf
+ (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE;
}
static inline struct mthca_cqe *cqe_sw(struct mthca_cq *cq, int i)
static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry)
{
return get_cqe_from_buf(&cq->buf, entry);
}
static inline struct mthca_cqe *cqe_sw(struct mthca_cqe *cqe)
{
struct mthca_cqe *cqe = get_cqe(cq, i);
return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe;
}
static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq)
{
return cqe_sw(cq, cq->cons_index & cq->ibcq.cqe);
return cqe_sw(get_cqe(cq, cq->cons_index & cq->ibcq.cqe));
}
static inline void set_cqe_hw(struct mthca_cqe *cqe)
@ -289,7 +294,7 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
* from our QP and therefore don't need to be checked.
*/
for (prod_index = cq->cons_index;
cqe_sw(cq, prod_index & cq->ibcq.cqe);
cqe_sw(get_cqe(cq, prod_index & cq->ibcq.cqe));
++prod_index)
if (prod_index == cq->cons_index + cq->ibcq.cqe)
break;
@ -324,6 +329,53 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
wake_up(&cq->wait);
}
void mthca_cq_resize_copy_cqes(struct mthca_cq *cq)
{
int i;
/*
* In Tavor mode, the hardware keeps the consumer and producer
* indices mod the CQ size. Since we might be making the CQ
* bigger, we need to deal with the case where the producer
* index wrapped around before the CQ was resized.
*/
if (!mthca_is_memfree(to_mdev(cq->ibcq.device)) &&
cq->ibcq.cqe < cq->resize_buf->cqe) {
cq->cons_index &= cq->ibcq.cqe;
if (cqe_sw(get_cqe(cq, cq->ibcq.cqe)))
cq->cons_index -= cq->ibcq.cqe + 1;
}
for (i = cq->cons_index; cqe_sw(get_cqe(cq, i & cq->ibcq.cqe)); ++i)
memcpy(get_cqe_from_buf(&cq->resize_buf->buf,
i & cq->resize_buf->cqe),
get_cqe(cq, i & cq->ibcq.cqe), MTHCA_CQ_ENTRY_SIZE);
}
int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent)
{
int ret;
int i;
ret = mthca_buf_alloc(dev, nent * MTHCA_CQ_ENTRY_SIZE,
MTHCA_MAX_DIRECT_CQ_SIZE,
&buf->queue, &buf->is_direct,
&dev->driver_pd, 1, &buf->mr);
if (ret)
return ret;
for (i = 0; i < nent; ++i)
set_cqe_hw(get_cqe_from_buf(buf, i));
return 0;
}
void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe)
{
mthca_buf_free(dev, (cqe + 1) * MTHCA_CQ_ENTRY_SIZE, &buf->queue,
buf->is_direct, &buf->mr);
}
static void handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
struct mthca_qp *qp, int wqe_index, int is_send,
struct mthca_err_cqe *cqe,
@ -609,11 +661,14 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
spin_lock_irqsave(&cq->lock, flags);
for (npolled = 0; npolled < num_entries; ++npolled) {
npolled = 0;
repoll:
while (npolled < num_entries) {
err = mthca_poll_one(dev, cq, &qp,
&freed, entry + npolled);
if (err)
break;
++npolled;
}
if (freed) {
@ -621,6 +676,42 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
update_cons_index(dev, cq, freed);
}
/*
* If a CQ resize is in progress and we discovered that the
* old buffer is empty, then peek in the new buffer, and if
* it's not empty, switch to the new buffer and continue
* polling there.
*/
if (unlikely(err == -EAGAIN && cq->resize_buf &&
cq->resize_buf->state == CQ_RESIZE_READY)) {
/*
* In Tavor mode, the hardware keeps the producer
* index modulo the CQ size. Since we might be making
* the CQ bigger, we need to mask our consumer index
* using the size of the old CQ buffer before looking
* in the new CQ buffer.
*/
if (!mthca_is_memfree(dev))
cq->cons_index &= cq->ibcq.cqe;
if (cqe_sw(get_cqe_from_buf(&cq->resize_buf->buf,
cq->cons_index & cq->resize_buf->cqe))) {
struct mthca_cq_buf tbuf;
int tcqe;
tbuf = cq->buf;
tcqe = cq->ibcq.cqe;
cq->buf = cq->resize_buf->buf;
cq->ibcq.cqe = cq->resize_buf->cqe;
cq->resize_buf->buf = tbuf;
cq->resize_buf->cqe = tcqe;
cq->resize_buf->state = CQ_RESIZE_SWAPPED;
goto repoll;
}
}
spin_unlock_irqrestore(&cq->lock, flags);
return err == 0 || err == -EAGAIN ? npolled : err;
@ -679,22 +770,14 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
return 0;
}
static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq)
{
mthca_buf_free(dev, (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE,
&cq->queue, cq->is_direct, &cq->mr);
}
int mthca_init_cq(struct mthca_dev *dev, int nent,
struct mthca_ucontext *ctx, u32 pdn,
struct mthca_cq *cq)
{
int size = nent * MTHCA_CQ_ENTRY_SIZE;
struct mthca_mailbox *mailbox;
struct mthca_cq_context *cq_context;
int err = -ENOMEM;
u8 status;
int i;
cq->ibcq.cqe = nent - 1;
cq->is_kernel = !ctx;
@ -732,14 +815,9 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
cq_context = mailbox->buf;
if (cq->is_kernel) {
err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_CQ_SIZE,
&cq->queue, &cq->is_direct,
&dev->driver_pd, 1, &cq->mr);
err = mthca_alloc_cq_buf(dev, &cq->buf, nent);
if (err)
goto err_out_mailbox;
for (i = 0; i < nent; ++i)
set_cqe_hw(get_cqe(cq, i));
}
spin_lock_init(&cq->lock);
@ -758,7 +836,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
cq_context->pd = cpu_to_be32(pdn);
cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey);
cq_context->lkey = cpu_to_be32(cq->buf.mr.ibmr.lkey);
cq_context->cqn = cpu_to_be32(cq->cqn);
if (mthca_is_memfree(dev)) {
@ -796,7 +874,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
err_out_free_mr:
if (cq->is_kernel)
mthca_free_cq_buf(dev, cq);
mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
err_out_mailbox:
mthca_free_mailbox(dev, mailbox);
@ -862,7 +940,7 @@ void mthca_free_cq(struct mthca_dev *dev,
wait_event(cq->wait, !atomic_read(&cq->refcount));
if (cq->is_kernel) {
mthca_free_cq_buf(dev, cq);
mthca_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
if (mthca_is_memfree(dev)) {
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);

View File

@ -1,7 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
@ -470,6 +470,9 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
enum ib_event_type event_type);
void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
struct mthca_srq *srq);
void mthca_cq_resize_copy_cqes(struct mthca_cq *cq);
int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent);
void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe);
int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
struct ib_srq_attr *attr, struct mthca_srq *srq);

View File

@ -1,7 +1,7 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
@ -669,9 +669,9 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
}
if (context) {
cq->mr.ibmr.lkey = ucmd.lkey;
cq->set_ci_db_index = ucmd.set_db_index;
cq->arm_db_index = ucmd.arm_db_index;
cq->buf.mr.ibmr.lkey = ucmd.lkey;
cq->set_ci_db_index = ucmd.set_db_index;
cq->arm_db_index = ucmd.arm_db_index;
}
for (nent = 1; nent <= entries; nent <<= 1)
@ -689,6 +689,8 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
goto err_free;
}
cq->resize_buf = NULL;
return &cq->ibcq;
err_free:
@ -707,6 +709,121 @@ err_unmap_set:
return ERR_PTR(err);
}
static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
int entries)
{
int ret;
spin_lock_irq(&cq->lock);
if (cq->resize_buf) {
ret = -EBUSY;
goto unlock;
}
cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
if (!cq->resize_buf) {
ret = -ENOMEM;
goto unlock;
}
cq->resize_buf->state = CQ_RESIZE_ALLOC;
ret = 0;
unlock:
spin_unlock_irq(&cq->lock);
if (ret)
return ret;
ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
if (ret) {
spin_lock_irq(&cq->lock);
kfree(cq->resize_buf);
cq->resize_buf = NULL;
spin_unlock_irq(&cq->lock);
return ret;
}
cq->resize_buf->cqe = entries - 1;
spin_lock_irq(&cq->lock);
cq->resize_buf->state = CQ_RESIZE_READY;
spin_unlock_irq(&cq->lock);
return 0;
}
static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(ibcq->device);
struct mthca_cq *cq = to_mcq(ibcq);
struct mthca_resize_cq ucmd;
u32 lkey;
u8 status;
int ret;
if (entries < 1 || entries > dev->limits.max_cqes)
return -EINVAL;
entries = roundup_pow_of_two(entries + 1);
if (entries == ibcq->cqe + 1)
return 0;
if (cq->is_kernel) {
ret = mthca_alloc_resize_buf(dev, cq, entries);
if (ret)
return ret;
lkey = cq->resize_buf->buf.mr.ibmr.lkey;
} else {
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
return -EFAULT;
lkey = ucmd.lkey;
}
ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, long_log2(entries), &status);
if (status)
ret = -EINVAL;
if (ret) {
if (cq->resize_buf) {
mthca_free_cq_buf(dev, &cq->resize_buf->buf,
cq->resize_buf->cqe);
kfree(cq->resize_buf);
spin_lock_irq(&cq->lock);
cq->resize_buf = NULL;
spin_unlock_irq(&cq->lock);
}
return ret;
}
if (cq->is_kernel) {
struct mthca_cq_buf tbuf;
int tcqe;
spin_lock_irq(&cq->lock);
if (cq->resize_buf->state == CQ_RESIZE_READY) {
mthca_cq_resize_copy_cqes(cq);
tbuf = cq->buf;
tcqe = cq->ibcq.cqe;
cq->buf = cq->resize_buf->buf;
cq->ibcq.cqe = cq->resize_buf->cqe;
} else {
tbuf = cq->resize_buf->buf;
tcqe = cq->resize_buf->cqe;
}
kfree(cq->resize_buf);
cq->resize_buf = NULL;
spin_unlock_irq(&cq->lock);
mthca_free_cq_buf(dev, &tbuf, tcqe);
} else
ibcq->cqe = entries - 1;
return 0;
}
static int mthca_destroy_cq(struct ib_cq *cq)
{
if (cq->uobject) {
@ -1113,6 +1230,7 @@ int mthca_register_device(struct mthca_dev *dev)
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
@ -1154,6 +1272,7 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.modify_qp = mthca_modify_qp;
dev->ib_dev.destroy_qp = mthca_destroy_qp;
dev->ib_dev.create_cq = mthca_create_cq;
dev->ib_dev.resize_cq = mthca_resize_cq;
dev->ib_dev.destroy_cq = mthca_destroy_cq;
dev->ib_dev.poll_cq = mthca_poll_cq;
dev->ib_dev.get_dma_mr = mthca_get_dma_mr;

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2004 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
@ -164,9 +164,11 @@ struct mthca_ah {
* - wait_event until ref count is zero
*
* It is the consumer's responsibilty to make sure that no QP
* operations (WQE posting or state modification) are pending when the
* operations (WQE posting or state modification) are pending when a
* QP is destroyed. Also, the consumer must make sure that calls to
* qp_modify are serialized.
* qp_modify are serialized. Similarly, the consumer is responsible
* for ensuring that no CQ resize operations are pending when a CQ
* is destroyed.
*
* Possible optimizations (wait for profile data to see if/where we
* have locks bouncing between CPUs):
@ -176,25 +178,40 @@ struct mthca_ah {
* send queue and one for the receive queue)
*/
struct mthca_cq_buf {
union mthca_buf queue;
struct mthca_mr mr;
int is_direct;
};
struct mthca_cq_resize {
struct mthca_cq_buf buf;
int cqe;
enum {
CQ_RESIZE_ALLOC,
CQ_RESIZE_READY,
CQ_RESIZE_SWAPPED
} state;
};
struct mthca_cq {
struct ib_cq ibcq;
spinlock_t lock;
atomic_t refcount;
int cqn;
u32 cons_index;
int is_direct;
int is_kernel;
struct ib_cq ibcq;
spinlock_t lock;
atomic_t refcount;
int cqn;
u32 cons_index;
struct mthca_cq_buf buf;
struct mthca_cq_resize *resize_buf;
int is_kernel;
/* Next fields are Arbel only */
int set_ci_db_index;
__be32 *set_ci_db;
int arm_db_index;
__be32 *arm_db;
int arm_sn;
int set_ci_db_index;
__be32 *set_ci_db;
int arm_db_index;
__be32 *arm_db;
int arm_sn;
union mthca_buf queue;
struct mthca_mr mr;
wait_queue_head_t wait;
wait_queue_head_t wait;
};
struct mthca_srq {

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -75,6 +75,11 @@ struct mthca_create_cq_resp {
__u32 reserved;
};
struct mthca_resize_cq {
__u32 lkey;
__u32 reserved;
};
struct mthca_create_srq {
__u32 lkey;
__u32 db_index;