hw/rdma: PVRDMA commands and data-path ops
First PVRDMA sub-module - implementation of the PVRDMA device. - PVRDMA commands such as create CQ and create MR. - Data path QP operations - post_send and post_recv. - Completion handler. Reviewed-by: Dotan Barak <dotanb@mellanox.com> Reviewed-by: Zhu Yanjun <yanjun.zhu@oracle.com> Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com> Signed-off-by: Marcel Apfelbaum <marcel@redhat.com>
This commit is contained in:
parent
ef6d4ccdc9
commit
98d176f8e5
@ -1,3 +1,5 @@
|
||||
ifeq ($(CONFIG_RDMA),y)
|
||||
obj-$(CONFIG_PCI) += rdma_utils.o rdma_backend.o rdma_rm.o
|
||||
obj-$(CONFIG_PCI) += vmw/pvrdma_dev_ring.o vmw/pvrdma_cmd.o \
|
||||
vmw/pvrdma_qp_ops.o
|
||||
endif
|
||||
|
122
hw/rdma/vmw/pvrdma.h
Normal file
122
hw/rdma/vmw/pvrdma.h
Normal file
@ -0,0 +1,122 @@
|
||||
/*
|
||||
* QEMU VMWARE paravirtual RDMA device definitions
|
||||
*
|
||||
* Copyright (C) 2018 Oracle
|
||||
* Copyright (C) 2018 Red Hat Inc
|
||||
*
|
||||
* Authors:
|
||||
* Yuval Shaia <yuval.shaia@oracle.com>
|
||||
* Marcel Apfelbaum <marcel@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef PVRDMA_PVRDMA_H
|
||||
#define PVRDMA_PVRDMA_H
|
||||
|
||||
#include <hw/pci/pci.h>
|
||||
#include <hw/pci/msix.h>
|
||||
|
||||
#include "../rdma_backend_defs.h"
|
||||
#include "../rdma_rm_defs.h"
|
||||
|
||||
#include <standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h>
|
||||
#include <standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h>
|
||||
#include "pvrdma_dev_ring.h"
|
||||
|
||||
/* BARs */
|
||||
#define RDMA_MSIX_BAR_IDX 0
|
||||
#define RDMA_REG_BAR_IDX 1
|
||||
#define RDMA_UAR_BAR_IDX 2
|
||||
#define RDMA_BAR0_MSIX_SIZE (16 * 1024)
|
||||
#define RDMA_BAR1_REGS_SIZE 256
|
||||
#define RDMA_BAR2_UAR_SIZE (0x1000 * MAX_UCS) /* each uc gets page */
|
||||
|
||||
/* MSIX */
|
||||
#define RDMA_MAX_INTRS 3
|
||||
#define RDMA_MSIX_TABLE 0x0000
|
||||
#define RDMA_MSIX_PBA 0x2000
|
||||
|
||||
/* Interrupts Vectors */
|
||||
#define INTR_VEC_CMD_RING 0
|
||||
#define INTR_VEC_CMD_ASYNC_EVENTS 1
|
||||
#define INTR_VEC_CMD_COMPLETION_Q 2
|
||||
|
||||
/* HW attributes */
|
||||
#define PVRDMA_HW_NAME "pvrdma"
|
||||
#define PVRDMA_HW_VERSION 17
|
||||
#define PVRDMA_FW_VERSION 14
|
||||
|
||||
typedef struct DSRInfo {
|
||||
dma_addr_t dma;
|
||||
struct pvrdma_device_shared_region *dsr;
|
||||
|
||||
union pvrdma_cmd_req *req;
|
||||
union pvrdma_cmd_resp *rsp;
|
||||
|
||||
struct pvrdma_ring *async_ring_state;
|
||||
PvrdmaRing async;
|
||||
|
||||
struct pvrdma_ring *cq_ring_state;
|
||||
PvrdmaRing cq;
|
||||
} DSRInfo;
|
||||
|
||||
typedef struct PVRDMADev {
|
||||
PCIDevice parent_obj;
|
||||
MemoryRegion msix;
|
||||
MemoryRegion regs;
|
||||
uint32_t regs_data[RDMA_BAR1_REGS_SIZE];
|
||||
MemoryRegion uar;
|
||||
uint32_t uar_data[RDMA_BAR2_UAR_SIZE];
|
||||
DSRInfo dsr_info;
|
||||
int interrupt_mask;
|
||||
struct ibv_device_attr dev_attr;
|
||||
uint64_t node_guid;
|
||||
char *backend_device_name;
|
||||
uint8_t backend_gid_idx;
|
||||
uint8_t backend_port_num;
|
||||
RdmaBackendDev backend_dev;
|
||||
RdmaDeviceResources rdma_dev_res;
|
||||
} PVRDMADev;
|
||||
#define PVRDMA_DEV(dev) OBJECT_CHECK(PVRDMADev, (dev), PVRDMA_HW_NAME)
|
||||
|
||||
static inline int get_reg_val(PVRDMADev *dev, hwaddr addr, uint32_t *val)
|
||||
{
|
||||
int idx = addr >> 2;
|
||||
|
||||
if (idx > RDMA_BAR1_REGS_SIZE) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*val = dev->regs_data[idx];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int set_reg_val(PVRDMADev *dev, hwaddr addr, uint32_t val)
|
||||
{
|
||||
int idx = addr >> 2;
|
||||
|
||||
if (idx > RDMA_BAR1_REGS_SIZE) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
dev->regs_data[idx] = val;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void post_interrupt(PVRDMADev *dev, unsigned vector)
|
||||
{
|
||||
PCIDevice *pci_dev = PCI_DEVICE(dev);
|
||||
|
||||
if (likely(!dev->interrupt_mask)) {
|
||||
msix_notify(pci_dev, vector);
|
||||
}
|
||||
}
|
||||
|
||||
int execute_command(PVRDMADev *dev);
|
||||
|
||||
#endif
|
673
hw/rdma/vmw/pvrdma_cmd.c
Normal file
673
hw/rdma/vmw/pvrdma_cmd.c
Normal file
@ -0,0 +1,673 @@
|
||||
/*
|
||||
* QEMU paravirtual RDMA - Command channel
|
||||
*
|
||||
* Copyright (C) 2018 Oracle
|
||||
* Copyright (C) 2018 Red Hat Inc
|
||||
*
|
||||
* Authors:
|
||||
* Yuval Shaia <yuval.shaia@oracle.com>
|
||||
* Marcel Apfelbaum <marcel@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <qemu/osdep.h>
|
||||
#include <qemu/error-report.h>
|
||||
#include <cpu.h>
|
||||
#include <linux/types.h>
|
||||
#include "hw/hw.h"
|
||||
#include "hw/pci/pci.h"
|
||||
#include "hw/pci/pci_ids.h"
|
||||
|
||||
#include "../rdma_backend.h"
|
||||
#include "../rdma_rm.h"
|
||||
#include "../rdma_utils.h"
|
||||
|
||||
#include "pvrdma.h"
|
||||
#include <standard-headers/rdma/vmw_pvrdma-abi.h>
|
||||
|
||||
static void *pvrdma_map_to_pdir(PCIDevice *pdev, uint64_t pdir_dma,
|
||||
uint32_t nchunks, size_t length)
|
||||
{
|
||||
uint64_t *dir, *tbl;
|
||||
int tbl_idx, dir_idx, addr_idx;
|
||||
void *host_virt = NULL, *curr_page;
|
||||
|
||||
if (!nchunks) {
|
||||
pr_dbg("nchunks=0\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
dir = rdma_pci_dma_map(pdev, pdir_dma, TARGET_PAGE_SIZE);
|
||||
if (!dir) {
|
||||
error_report("PVRDMA: Failed to map to page directory");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tbl = rdma_pci_dma_map(pdev, dir[0], TARGET_PAGE_SIZE);
|
||||
if (!tbl) {
|
||||
error_report("PVRDMA: Failed to map to page table 0");
|
||||
goto out_unmap_dir;
|
||||
}
|
||||
|
||||
curr_page = rdma_pci_dma_map(pdev, (dma_addr_t)tbl[0], TARGET_PAGE_SIZE);
|
||||
if (!curr_page) {
|
||||
error_report("PVRDMA: Failed to map the first page");
|
||||
goto out_unmap_tbl;
|
||||
}
|
||||
|
||||
host_virt = mremap(curr_page, 0, length, MREMAP_MAYMOVE);
|
||||
if (host_virt == MAP_FAILED) {
|
||||
host_virt = NULL;
|
||||
error_report("PVRDMA: Failed to remap memory for host_virt");
|
||||
goto out_unmap_tbl;
|
||||
}
|
||||
|
||||
rdma_pci_dma_unmap(pdev, curr_page, TARGET_PAGE_SIZE);
|
||||
|
||||
pr_dbg("host_virt=%p\n", host_virt);
|
||||
|
||||
dir_idx = 0;
|
||||
tbl_idx = 1;
|
||||
addr_idx = 1;
|
||||
while (addr_idx < nchunks) {
|
||||
if ((tbl_idx == (TARGET_PAGE_SIZE / sizeof(uint64_t)))) {
|
||||
tbl_idx = 0;
|
||||
dir_idx++;
|
||||
pr_dbg("Mapping to table %d\n", dir_idx);
|
||||
rdma_pci_dma_unmap(pdev, tbl, TARGET_PAGE_SIZE);
|
||||
tbl = rdma_pci_dma_map(pdev, dir[dir_idx], TARGET_PAGE_SIZE);
|
||||
if (!tbl) {
|
||||
error_report("PVRDMA: Failed to map to page table %d", dir_idx);
|
||||
goto out_unmap_host_virt;
|
||||
}
|
||||
}
|
||||
|
||||
pr_dbg("guest_dma[%d]=0x%lx\n", addr_idx, tbl[tbl_idx]);
|
||||
|
||||
curr_page = rdma_pci_dma_map(pdev, (dma_addr_t)tbl[tbl_idx],
|
||||
TARGET_PAGE_SIZE);
|
||||
if (!curr_page) {
|
||||
error_report("PVRDMA: Failed to map to page %d, dir %d", tbl_idx,
|
||||
dir_idx);
|
||||
goto out_unmap_host_virt;
|
||||
}
|
||||
|
||||
mremap(curr_page, 0, TARGET_PAGE_SIZE, MREMAP_MAYMOVE | MREMAP_FIXED,
|
||||
host_virt + TARGET_PAGE_SIZE * addr_idx);
|
||||
|
||||
rdma_pci_dma_unmap(pdev, curr_page, TARGET_PAGE_SIZE);
|
||||
|
||||
addr_idx++;
|
||||
|
||||
tbl_idx++;
|
||||
}
|
||||
|
||||
goto out_unmap_tbl;
|
||||
|
||||
out_unmap_host_virt:
|
||||
munmap(host_virt, length);
|
||||
host_virt = NULL;
|
||||
|
||||
out_unmap_tbl:
|
||||
rdma_pci_dma_unmap(pdev, tbl, TARGET_PAGE_SIZE);
|
||||
|
||||
out_unmap_dir:
|
||||
rdma_pci_dma_unmap(pdev, dir, TARGET_PAGE_SIZE);
|
||||
|
||||
return host_virt;
|
||||
}
|
||||
|
||||
static int query_port(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp)
|
||||
{
|
||||
struct pvrdma_cmd_query_port *cmd = &req->query_port;
|
||||
struct pvrdma_cmd_query_port_resp *resp = &rsp->query_port_resp;
|
||||
struct pvrdma_port_attr attrs = {0};
|
||||
|
||||
pr_dbg("port=%d\n", cmd->port_num);
|
||||
|
||||
if (rdma_backend_query_port(&dev->backend_dev,
|
||||
(struct ibv_port_attr *)&attrs)) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
memset(resp, 0, sizeof(*resp));
|
||||
resp->hdr.response = cmd->hdr.response;
|
||||
resp->hdr.ack = PVRDMA_CMD_QUERY_PORT_RESP;
|
||||
resp->hdr.err = 0;
|
||||
|
||||
resp->attrs.state = attrs.state;
|
||||
resp->attrs.max_mtu = attrs.max_mtu;
|
||||
resp->attrs.active_mtu = attrs.active_mtu;
|
||||
resp->attrs.phys_state = attrs.phys_state;
|
||||
resp->attrs.gid_tbl_len = MIN(MAX_PORT_GIDS, attrs.gid_tbl_len);
|
||||
resp->attrs.max_msg_sz = 1024;
|
||||
resp->attrs.pkey_tbl_len = MIN(MAX_PORT_PKEYS, attrs.pkey_tbl_len);
|
||||
resp->attrs.active_width = 1;
|
||||
resp->attrs.active_speed = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int query_pkey(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp)
|
||||
{
|
||||
struct pvrdma_cmd_query_pkey *cmd = &req->query_pkey;
|
||||
struct pvrdma_cmd_query_pkey_resp *resp = &rsp->query_pkey_resp;
|
||||
|
||||
pr_dbg("port=%d\n", cmd->port_num);
|
||||
pr_dbg("index=%d\n", cmd->index);
|
||||
|
||||
memset(resp, 0, sizeof(*resp));
|
||||
resp->hdr.response = cmd->hdr.response;
|
||||
resp->hdr.ack = PVRDMA_CMD_QUERY_PKEY_RESP;
|
||||
resp->hdr.err = 0;
|
||||
|
||||
resp->pkey = 0x7FFF;
|
||||
pr_dbg("pkey=0x%x\n", resp->pkey);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int create_pd(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp)
|
||||
{
|
||||
struct pvrdma_cmd_create_pd *cmd = &req->create_pd;
|
||||
struct pvrdma_cmd_create_pd_resp *resp = &rsp->create_pd_resp;
|
||||
|
||||
pr_dbg("context=0x%x\n", cmd->ctx_handle ? cmd->ctx_handle : 0);
|
||||
|
||||
memset(resp, 0, sizeof(*resp));
|
||||
resp->hdr.response = cmd->hdr.response;
|
||||
resp->hdr.ack = PVRDMA_CMD_CREATE_PD_RESP;
|
||||
resp->hdr.err = rdma_rm_alloc_pd(&dev->rdma_dev_res, &dev->backend_dev,
|
||||
&resp->pd_handle, cmd->ctx_handle);
|
||||
|
||||
pr_dbg("ret=%d\n", resp->hdr.err);
|
||||
return resp->hdr.err;
|
||||
}
|
||||
|
||||
static int destroy_pd(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp)
|
||||
{
|
||||
struct pvrdma_cmd_destroy_pd *cmd = &req->destroy_pd;
|
||||
|
||||
pr_dbg("pd_handle=%d\n", cmd->pd_handle);
|
||||
|
||||
rdma_rm_dealloc_pd(&dev->rdma_dev_res, cmd->pd_handle);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int create_mr(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp)
|
||||
{
|
||||
struct pvrdma_cmd_create_mr *cmd = &req->create_mr;
|
||||
struct pvrdma_cmd_create_mr_resp *resp = &rsp->create_mr_resp;
|
||||
PCIDevice *pci_dev = PCI_DEVICE(dev);
|
||||
void *host_virt = NULL;
|
||||
|
||||
memset(resp, 0, sizeof(*resp));
|
||||
resp->hdr.response = cmd->hdr.response;
|
||||
resp->hdr.ack = PVRDMA_CMD_CREATE_MR_RESP;
|
||||
|
||||
pr_dbg("pd_handle=%d\n", cmd->pd_handle);
|
||||
pr_dbg("access_flags=0x%x\n", cmd->access_flags);
|
||||
pr_dbg("flags=0x%x\n", cmd->flags);
|
||||
|
||||
if (!(cmd->flags & PVRDMA_MR_FLAG_DMA)) {
|
||||
host_virt = pvrdma_map_to_pdir(pci_dev, cmd->pdir_dma, cmd->nchunks,
|
||||
cmd->length);
|
||||
if (!host_virt) {
|
||||
pr_dbg("Failed to map to pdir\n");
|
||||
resp->hdr.err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
resp->hdr.err = rdma_rm_alloc_mr(&dev->rdma_dev_res, cmd->pd_handle,
|
||||
cmd->start, cmd->length, host_virt,
|
||||
cmd->access_flags, &resp->mr_handle,
|
||||
&resp->lkey, &resp->rkey);
|
||||
if (!resp->hdr.err) {
|
||||
munmap(host_virt, cmd->length);
|
||||
}
|
||||
|
||||
out:
|
||||
pr_dbg("ret=%d\n", resp->hdr.err);
|
||||
return resp->hdr.err;
|
||||
}
|
||||
|
||||
static int destroy_mr(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp)
|
||||
{
|
||||
struct pvrdma_cmd_destroy_mr *cmd = &req->destroy_mr;
|
||||
|
||||
pr_dbg("mr_handle=%d\n", cmd->mr_handle);
|
||||
|
||||
rdma_rm_dealloc_mr(&dev->rdma_dev_res, cmd->mr_handle);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int create_cq_ring(PCIDevice *pci_dev , PvrdmaRing **ring,
|
||||
uint64_t pdir_dma, uint32_t nchunks, uint32_t cqe)
|
||||
{
|
||||
uint64_t *dir = NULL, *tbl = NULL;
|
||||
PvrdmaRing *r;
|
||||
int rc = -EINVAL;
|
||||
char ring_name[MAX_RING_NAME_SZ];
|
||||
|
||||
pr_dbg("pdir_dma=0x%llx\n", (long long unsigned int)pdir_dma);
|
||||
dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE);
|
||||
if (!dir) {
|
||||
pr_dbg("Failed to map to CQ page directory\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE);
|
||||
if (!tbl) {
|
||||
pr_dbg("Failed to map to CQ page table\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
r = g_malloc(sizeof(*r));
|
||||
*ring = r;
|
||||
|
||||
r->ring_state = (struct pvrdma_ring *)
|
||||
rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE);
|
||||
|
||||
if (!r->ring_state) {
|
||||
pr_dbg("Failed to map to CQ ring state\n");
|
||||
goto out_free_ring;
|
||||
}
|
||||
|
||||
sprintf(ring_name, "cq_ring_%lx", pdir_dma);
|
||||
rc = pvrdma_ring_init(r, ring_name, pci_dev, &r->ring_state[1],
|
||||
cqe, sizeof(struct pvrdma_cqe),
|
||||
/* first page is ring state */
|
||||
(dma_addr_t *)&tbl[1], nchunks - 1);
|
||||
if (rc) {
|
||||
goto out_unmap_ring_state;
|
||||
}
|
||||
|
||||
goto out;
|
||||
|
||||
out_unmap_ring_state:
|
||||
/* ring_state was in slot 1, not 0 so need to jump back */
|
||||
rdma_pci_dma_unmap(pci_dev, --r->ring_state, TARGET_PAGE_SIZE);
|
||||
|
||||
out_free_ring:
|
||||
g_free(r);
|
||||
|
||||
out:
|
||||
rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE);
|
||||
rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int create_cq(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp)
|
||||
{
|
||||
struct pvrdma_cmd_create_cq *cmd = &req->create_cq;
|
||||
struct pvrdma_cmd_create_cq_resp *resp = &rsp->create_cq_resp;
|
||||
PvrdmaRing *ring = NULL;
|
||||
|
||||
memset(resp, 0, sizeof(*resp));
|
||||
resp->hdr.response = cmd->hdr.response;
|
||||
resp->hdr.ack = PVRDMA_CMD_CREATE_CQ_RESP;
|
||||
|
||||
resp->cqe = cmd->cqe;
|
||||
|
||||
resp->hdr.err = create_cq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma,
|
||||
cmd->nchunks, cmd->cqe);
|
||||
if (resp->hdr.err) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_dbg("ring=%p\n", ring);
|
||||
|
||||
resp->hdr.err = rdma_rm_alloc_cq(&dev->rdma_dev_res, &dev->backend_dev,
|
||||
cmd->cqe, &resp->cq_handle, ring);
|
||||
resp->cqe = cmd->cqe;
|
||||
|
||||
out:
|
||||
pr_dbg("ret=%d\n", resp->hdr.err);
|
||||
return resp->hdr.err;
|
||||
}
|
||||
|
||||
static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp)
|
||||
{
|
||||
struct pvrdma_cmd_destroy_cq *cmd = &req->destroy_cq;
|
||||
RdmaRmCQ *cq;
|
||||
PvrdmaRing *ring;
|
||||
|
||||
pr_dbg("cq_handle=%d\n", cmd->cq_handle);
|
||||
|
||||
cq = rdma_rm_get_cq(&dev->rdma_dev_res, cmd->cq_handle);
|
||||
if (!cq) {
|
||||
pr_dbg("Invalid CQ handle\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ring = (PvrdmaRing *)cq->opaque;
|
||||
pvrdma_ring_free(ring);
|
||||
/* ring_state was in slot 1, not 0 so need to jump back */
|
||||
rdma_pci_dma_unmap(PCI_DEVICE(dev), --ring->ring_state, TARGET_PAGE_SIZE);
|
||||
g_free(ring);
|
||||
|
||||
rdma_rm_dealloc_cq(&dev->rdma_dev_res, cmd->cq_handle);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,
|
||||
PvrdmaRing **rings, uint32_t scqe, uint32_t smax_sge,
|
||||
uint32_t spages, uint32_t rcqe, uint32_t rmax_sge,
|
||||
uint32_t rpages)
|
||||
{
|
||||
uint64_t *dir = NULL, *tbl = NULL;
|
||||
PvrdmaRing *sr, *rr;
|
||||
int rc = -EINVAL;
|
||||
char ring_name[MAX_RING_NAME_SZ];
|
||||
uint32_t wqe_sz;
|
||||
|
||||
pr_dbg("pdir_dma=0x%llx\n", (long long unsigned int)pdir_dma);
|
||||
dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE);
|
||||
if (!dir) {
|
||||
pr_dbg("Failed to map to CQ page directory\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE);
|
||||
if (!tbl) {
|
||||
pr_dbg("Failed to map to CQ page table\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
sr = g_malloc(2 * sizeof(*rr));
|
||||
rr = &sr[1];
|
||||
pr_dbg("sring=%p\n", sr);
|
||||
pr_dbg("rring=%p\n", rr);
|
||||
|
||||
*rings = sr;
|
||||
|
||||
pr_dbg("scqe=%d\n", scqe);
|
||||
pr_dbg("smax_sge=%d\n", smax_sge);
|
||||
pr_dbg("spages=%d\n", spages);
|
||||
pr_dbg("rcqe=%d\n", rcqe);
|
||||
pr_dbg("rmax_sge=%d\n", rmax_sge);
|
||||
pr_dbg("rpages=%d\n", rpages);
|
||||
|
||||
/* Create send ring */
|
||||
sr->ring_state = (struct pvrdma_ring *)
|
||||
rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE);
|
||||
if (!sr->ring_state) {
|
||||
pr_dbg("Failed to map to CQ ring state\n");
|
||||
goto out_free_sr_mem;
|
||||
}
|
||||
|
||||
wqe_sz = pow2ceil(sizeof(struct pvrdma_sq_wqe_hdr) +
|
||||
sizeof(struct pvrdma_sge) * smax_sge - 1);
|
||||
|
||||
sprintf(ring_name, "qp_sring_%lx", pdir_dma);
|
||||
rc = pvrdma_ring_init(sr, ring_name, pci_dev, sr->ring_state,
|
||||
scqe, wqe_sz, (dma_addr_t *)&tbl[1], spages);
|
||||
if (rc) {
|
||||
goto out_unmap_ring_state;
|
||||
}
|
||||
|
||||
/* Create recv ring */
|
||||
rr->ring_state = &sr->ring_state[1];
|
||||
wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
|
||||
sizeof(struct pvrdma_sge) * rmax_sge - 1);
|
||||
sprintf(ring_name, "qp_rring_%lx", pdir_dma);
|
||||
rc = pvrdma_ring_init(rr, ring_name, pci_dev, rr->ring_state,
|
||||
rcqe, wqe_sz, (dma_addr_t *)&tbl[1 + spages], rpages);
|
||||
if (rc) {
|
||||
goto out_free_sr;
|
||||
}
|
||||
|
||||
goto out;
|
||||
|
||||
out_free_sr:
|
||||
pvrdma_ring_free(sr);
|
||||
|
||||
out_unmap_ring_state:
|
||||
rdma_pci_dma_unmap(pci_dev, sr->ring_state, TARGET_PAGE_SIZE);
|
||||
|
||||
out_free_sr_mem:
|
||||
g_free(sr);
|
||||
|
||||
out:
|
||||
rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE);
|
||||
rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int create_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp)
|
||||
{
|
||||
struct pvrdma_cmd_create_qp *cmd = &req->create_qp;
|
||||
struct pvrdma_cmd_create_qp_resp *resp = &rsp->create_qp_resp;
|
||||
PvrdmaRing *rings = NULL;
|
||||
|
||||
memset(resp, 0, sizeof(*resp));
|
||||
resp->hdr.response = cmd->hdr.response;
|
||||
resp->hdr.ack = PVRDMA_CMD_CREATE_QP_RESP;
|
||||
|
||||
pr_dbg("total_chunks=%d\n", cmd->total_chunks);
|
||||
pr_dbg("send_chunks=%d\n", cmd->send_chunks);
|
||||
|
||||
resp->hdr.err = create_qp_rings(PCI_DEVICE(dev), cmd->pdir_dma, &rings,
|
||||
cmd->max_send_wr, cmd->max_send_sge,
|
||||
cmd->send_chunks, cmd->max_recv_wr,
|
||||
cmd->max_recv_sge, cmd->total_chunks -
|
||||
cmd->send_chunks - 1);
|
||||
if (resp->hdr.err) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_dbg("rings=%p\n", rings);
|
||||
|
||||
resp->hdr.err = rdma_rm_alloc_qp(&dev->rdma_dev_res, cmd->pd_handle,
|
||||
cmd->qp_type, cmd->max_send_wr,
|
||||
cmd->max_send_sge, cmd->send_cq_handle,
|
||||
cmd->max_recv_wr, cmd->max_recv_sge,
|
||||
cmd->recv_cq_handle, rings, &resp->qpn);
|
||||
|
||||
resp->max_send_wr = cmd->max_send_wr;
|
||||
resp->max_recv_wr = cmd->max_recv_wr;
|
||||
resp->max_send_sge = cmd->max_send_sge;
|
||||
resp->max_recv_sge = cmd->max_recv_sge;
|
||||
resp->max_inline_data = cmd->max_inline_data;
|
||||
|
||||
out:
|
||||
pr_dbg("ret=%d\n", resp->hdr.err);
|
||||
return resp->hdr.err;
|
||||
}
|
||||
|
||||
static int modify_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp)
|
||||
{
|
||||
struct pvrdma_cmd_modify_qp *cmd = &req->modify_qp;
|
||||
|
||||
pr_dbg("qp_handle=%d\n", cmd->qp_handle);
|
||||
|
||||
memset(rsp, 0, sizeof(*rsp));
|
||||
rsp->hdr.response = cmd->hdr.response;
|
||||
rsp->hdr.ack = PVRDMA_CMD_MODIFY_QP_RESP;
|
||||
|
||||
rsp->hdr.err = rdma_rm_modify_qp(&dev->rdma_dev_res, &dev->backend_dev,
|
||||
cmd->qp_handle, cmd->attr_mask,
|
||||
(union ibv_gid *)&cmd->attrs.ah_attr.grh.dgid,
|
||||
cmd->attrs.dest_qp_num, cmd->attrs.qp_state,
|
||||
cmd->attrs.qkey, cmd->attrs.rq_psn,
|
||||
cmd->attrs.sq_psn);
|
||||
|
||||
pr_dbg("ret=%d\n", rsp->hdr.err);
|
||||
return rsp->hdr.err;
|
||||
}
|
||||
|
||||
static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp)
|
||||
{
|
||||
struct pvrdma_cmd_destroy_qp *cmd = &req->destroy_qp;
|
||||
RdmaRmQP *qp;
|
||||
PvrdmaRing *ring;
|
||||
|
||||
qp = rdma_rm_get_qp(&dev->rdma_dev_res, cmd->qp_handle);
|
||||
if (!qp) {
|
||||
pr_dbg("Invalid QP handle\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle);
|
||||
|
||||
ring = (PvrdmaRing *)qp->opaque;
|
||||
pr_dbg("sring=%p\n", &ring[0]);
|
||||
pvrdma_ring_free(&ring[0]);
|
||||
pr_dbg("rring=%p\n", &ring[1]);
|
||||
pvrdma_ring_free(&ring[1]);
|
||||
|
||||
rdma_pci_dma_unmap(PCI_DEVICE(dev), ring->ring_state, TARGET_PAGE_SIZE);
|
||||
g_free(ring);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp)
|
||||
{
|
||||
struct pvrdma_cmd_create_bind *cmd = &req->create_bind;
|
||||
#ifdef PVRDMA_DEBUG
|
||||
__be64 *subnet = (__be64 *)&cmd->new_gid[0];
|
||||
__be64 *if_id = (__be64 *)&cmd->new_gid[8];
|
||||
#endif
|
||||
|
||||
pr_dbg("index=%d\n", cmd->index);
|
||||
|
||||
if (cmd->index > MAX_PORT_GIDS) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pr_dbg("gid[%d]=0x%llx,0x%llx\n", cmd->index,
|
||||
(long long unsigned int)be64_to_cpu(*subnet),
|
||||
(long long unsigned int)be64_to_cpu(*if_id));
|
||||
|
||||
/* Driver forces to one port only */
|
||||
memcpy(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw, &cmd->new_gid,
|
||||
sizeof(cmd->new_gid));
|
||||
|
||||
/* TODO: Since drivers stores node_guid at load_dsr phase then this
|
||||
* assignment is not relevant, i need to figure out a way how to
|
||||
* retrieve MAC of our netdev */
|
||||
dev->node_guid = dev->rdma_dev_res.ports[0].gid_tbl[0].global.interface_id;
|
||||
pr_dbg("dev->node_guid=0x%llx\n",
|
||||
(long long unsigned int)be64_to_cpu(dev->node_guid));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int destroy_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp)
|
||||
{
|
||||
struct pvrdma_cmd_destroy_bind *cmd = &req->destroy_bind;
|
||||
|
||||
pr_dbg("clear index %d\n", cmd->index);
|
||||
|
||||
memset(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw, 0,
|
||||
sizeof(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int create_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp)
|
||||
{
|
||||
struct pvrdma_cmd_create_uc *cmd = &req->create_uc;
|
||||
struct pvrdma_cmd_create_uc_resp *resp = &rsp->create_uc_resp;
|
||||
|
||||
pr_dbg("pfn=%d\n", cmd->pfn);
|
||||
|
||||
memset(resp, 0, sizeof(*resp));
|
||||
resp->hdr.response = cmd->hdr.response;
|
||||
resp->hdr.ack = PVRDMA_CMD_CREATE_UC_RESP;
|
||||
resp->hdr.err = rdma_rm_alloc_uc(&dev->rdma_dev_res, cmd->pfn,
|
||||
&resp->ctx_handle);
|
||||
|
||||
pr_dbg("ret=%d\n", resp->hdr.err);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp)
|
||||
{
|
||||
struct pvrdma_cmd_destroy_uc *cmd = &req->destroy_uc;
|
||||
|
||||
pr_dbg("ctx_handle=%d\n", cmd->ctx_handle);
|
||||
|
||||
rdma_rm_dealloc_uc(&dev->rdma_dev_res, cmd->ctx_handle);
|
||||
|
||||
return 0;
|
||||
}
|
||||
struct cmd_handler {
|
||||
uint32_t cmd;
|
||||
int (*exec)(PVRDMADev *dev, union pvrdma_cmd_req *req,
|
||||
union pvrdma_cmd_resp *rsp);
|
||||
};
|
||||
|
||||
static struct cmd_handler cmd_handlers[] = {
|
||||
{PVRDMA_CMD_QUERY_PORT, query_port},
|
||||
{PVRDMA_CMD_QUERY_PKEY, query_pkey},
|
||||
{PVRDMA_CMD_CREATE_PD, create_pd},
|
||||
{PVRDMA_CMD_DESTROY_PD, destroy_pd},
|
||||
{PVRDMA_CMD_CREATE_MR, create_mr},
|
||||
{PVRDMA_CMD_DESTROY_MR, destroy_mr},
|
||||
{PVRDMA_CMD_CREATE_CQ, create_cq},
|
||||
{PVRDMA_CMD_RESIZE_CQ, NULL},
|
||||
{PVRDMA_CMD_DESTROY_CQ, destroy_cq},
|
||||
{PVRDMA_CMD_CREATE_QP, create_qp},
|
||||
{PVRDMA_CMD_MODIFY_QP, modify_qp},
|
||||
{PVRDMA_CMD_QUERY_QP, NULL},
|
||||
{PVRDMA_CMD_DESTROY_QP, destroy_qp},
|
||||
{PVRDMA_CMD_CREATE_UC, create_uc},
|
||||
{PVRDMA_CMD_DESTROY_UC, destroy_uc},
|
||||
{PVRDMA_CMD_CREATE_BIND, create_bind},
|
||||
{PVRDMA_CMD_DESTROY_BIND, destroy_bind},
|
||||
};
|
||||
|
||||
int execute_command(PVRDMADev *dev)
|
||||
{
|
||||
int err = 0xFFFF;
|
||||
DSRInfo *dsr_info;
|
||||
|
||||
dsr_info = &dev->dsr_info;
|
||||
|
||||
pr_dbg("cmd=%d\n", dsr_info->req->hdr.cmd);
|
||||
if (dsr_info->req->hdr.cmd >= sizeof(cmd_handlers) /
|
||||
sizeof(struct cmd_handler)) {
|
||||
pr_dbg("Unsupported command\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!cmd_handlers[dsr_info->req->hdr.cmd].exec) {
|
||||
pr_dbg("Unsupported command (not implemented yet)\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = cmd_handlers[dsr_info->req->hdr.cmd].exec(dev, dsr_info->req,
|
||||
dsr_info->rsp);
|
||||
out:
|
||||
set_reg_val(dev, PVRDMA_REG_ERR, err);
|
||||
post_interrupt(dev, INTR_VEC_CMD_RING);
|
||||
|
||||
return (err == 0) ? 0 : -EINVAL;
|
||||
}
|
155
hw/rdma/vmw/pvrdma_dev_ring.c
Normal file
155
hw/rdma/vmw/pvrdma_dev_ring.c
Normal file
@ -0,0 +1,155 @@
|
||||
/*
|
||||
* QEMU paravirtual RDMA - Device rings
|
||||
*
|
||||
* Copyright (C) 2018 Oracle
|
||||
* Copyright (C) 2018 Red Hat Inc
|
||||
*
|
||||
* Authors:
|
||||
* Yuval Shaia <yuval.shaia@oracle.com>
|
||||
* Marcel Apfelbaum <marcel@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <qemu/osdep.h>
|
||||
#include <hw/pci/pci.h>
|
||||
#include <cpu.h>
|
||||
|
||||
#include "../rdma_utils.h"
|
||||
#include <standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h>
|
||||
#include "pvrdma_dev_ring.h"
|
||||
|
||||
int pvrdma_ring_init(PvrdmaRing *ring, const char *name, PCIDevice *dev,
|
||||
struct pvrdma_ring *ring_state, uint32_t max_elems,
|
||||
size_t elem_sz, dma_addr_t *tbl, dma_addr_t npages)
|
||||
{
|
||||
int i;
|
||||
int rc = 0;
|
||||
|
||||
strncpy(ring->name, name, MAX_RING_NAME_SZ);
|
||||
ring->name[MAX_RING_NAME_SZ - 1] = 0;
|
||||
pr_dbg("Initializing %s ring\n", ring->name);
|
||||
ring->dev = dev;
|
||||
ring->ring_state = ring_state;
|
||||
ring->max_elems = max_elems;
|
||||
ring->elem_sz = elem_sz;
|
||||
pr_dbg("ring->elem_sz=%ld\n", ring->elem_sz);
|
||||
pr_dbg("npages=%ld\n", npages);
|
||||
/* TODO: Give a moment to think if we want to redo driver settings
|
||||
atomic_set(&ring->ring_state->prod_tail, 0);
|
||||
atomic_set(&ring->ring_state->cons_head, 0);
|
||||
*/
|
||||
ring->npages = npages;
|
||||
ring->pages = g_malloc(npages * sizeof(void *));
|
||||
|
||||
for (i = 0; i < npages; i++) {
|
||||
if (!tbl[i]) {
|
||||
pr_err("npages=%ld but tbl[%d] is NULL\n", (long)npages, i);
|
||||
continue;
|
||||
}
|
||||
|
||||
ring->pages[i] = rdma_pci_dma_map(dev, tbl[i], TARGET_PAGE_SIZE);
|
||||
if (!ring->pages[i]) {
|
||||
rc = -ENOMEM;
|
||||
pr_dbg("Failed to map to page %d\n", i);
|
||||
goto out_free;
|
||||
}
|
||||
memset(ring->pages[i], 0, TARGET_PAGE_SIZE);
|
||||
}
|
||||
|
||||
goto out;
|
||||
|
||||
out_free:
|
||||
while (i--) {
|
||||
rdma_pci_dma_unmap(dev, ring->pages[i], TARGET_PAGE_SIZE);
|
||||
}
|
||||
g_free(ring->pages);
|
||||
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
void *pvrdma_ring_next_elem_read(PvrdmaRing *ring)
|
||||
{
|
||||
unsigned int idx = 0, offset;
|
||||
|
||||
/*
|
||||
pr_dbg("%s: t=%d, h=%d\n", ring->name, ring->ring_state->prod_tail,
|
||||
ring->ring_state->cons_head);
|
||||
*/
|
||||
|
||||
if (!pvrdma_idx_ring_has_data(ring->ring_state, ring->max_elems, &idx)) {
|
||||
pr_dbg("No more data in ring\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
offset = idx * ring->elem_sz;
|
||||
/*
|
||||
pr_dbg("idx=%d\n", idx);
|
||||
pr_dbg("offset=%d\n", offset);
|
||||
*/
|
||||
return ring->pages[offset / TARGET_PAGE_SIZE] + (offset % TARGET_PAGE_SIZE);
|
||||
}
|
||||
|
||||
void pvrdma_ring_read_inc(PvrdmaRing *ring)
|
||||
{
|
||||
pvrdma_idx_ring_inc(&ring->ring_state->cons_head, ring->max_elems);
|
||||
/*
|
||||
pr_dbg("%s: t=%d, h=%d, m=%ld\n", ring->name,
|
||||
ring->ring_state->prod_tail, ring->ring_state->cons_head,
|
||||
ring->max_elems);
|
||||
*/
|
||||
}
|
||||
|
||||
void *pvrdma_ring_next_elem_write(PvrdmaRing *ring)
|
||||
{
|
||||
unsigned int idx, offset, tail;
|
||||
|
||||
/*
|
||||
pr_dbg("%s: t=%d, h=%d\n", ring->name, ring->ring_state->prod_tail,
|
||||
ring->ring_state->cons_head);
|
||||
*/
|
||||
|
||||
if (!pvrdma_idx_ring_has_space(ring->ring_state, ring->max_elems, &tail)) {
|
||||
pr_dbg("CQ is full\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
idx = pvrdma_idx(&ring->ring_state->prod_tail, ring->max_elems);
|
||||
/* TODO: tail == idx */
|
||||
|
||||
offset = idx * ring->elem_sz;
|
||||
return ring->pages[offset / TARGET_PAGE_SIZE] + (offset % TARGET_PAGE_SIZE);
|
||||
}
|
||||
|
||||
void pvrdma_ring_write_inc(PvrdmaRing *ring)
|
||||
{
|
||||
pvrdma_idx_ring_inc(&ring->ring_state->prod_tail, ring->max_elems);
|
||||
/*
|
||||
pr_dbg("%s: t=%d, h=%d, m=%ld\n", ring->name,
|
||||
ring->ring_state->prod_tail, ring->ring_state->cons_head,
|
||||
ring->max_elems);
|
||||
*/
|
||||
}
|
||||
|
||||
void pvrdma_ring_free(PvrdmaRing *ring)
|
||||
{
|
||||
if (!ring) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!ring->pages) {
|
||||
return;
|
||||
}
|
||||
|
||||
pr_dbg("ring->npages=%d\n", ring->npages);
|
||||
while (ring->npages--) {
|
||||
rdma_pci_dma_unmap(ring->dev, ring->pages[ring->npages],
|
||||
TARGET_PAGE_SIZE);
|
||||
}
|
||||
|
||||
g_free(ring->pages);
|
||||
ring->pages = NULL;
|
||||
}
|
42
hw/rdma/vmw/pvrdma_dev_ring.h
Normal file
42
hw/rdma/vmw/pvrdma_dev_ring.h
Normal file
@ -0,0 +1,42 @@
|
||||
/*
|
||||
* QEMU VMWARE paravirtual RDMA ring utilities
|
||||
*
|
||||
* Copyright (C) 2018 Oracle
|
||||
* Copyright (C) 2018 Red Hat Inc
|
||||
*
|
||||
* Authors:
|
||||
* Yuval Shaia <yuval.shaia@oracle.com>
|
||||
* Marcel Apfelbaum <marcel@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef PVRDMA_DEV_RING_H
|
||||
#define PVRDMA_DEV_RING_H
|
||||
|
||||
#include <qemu/typedefs.h>
|
||||
|
||||
#define MAX_RING_NAME_SZ 32
|
||||
|
||||
typedef struct PvrdmaRing {
|
||||
char name[MAX_RING_NAME_SZ];
|
||||
PCIDevice *dev;
|
||||
uint32_t max_elems;
|
||||
size_t elem_sz;
|
||||
struct pvrdma_ring *ring_state; /* used only for unmap */
|
||||
int npages;
|
||||
void **pages;
|
||||
} PvrdmaRing;
|
||||
|
||||
int pvrdma_ring_init(PvrdmaRing *ring, const char *name, PCIDevice *dev,
|
||||
struct pvrdma_ring *ring_state, uint32_t max_elems,
|
||||
size_t elem_sz, dma_addr_t *tbl, dma_addr_t npages);
|
||||
void *pvrdma_ring_next_elem_read(PvrdmaRing *ring);
|
||||
void pvrdma_ring_read_inc(PvrdmaRing *ring);
|
||||
void *pvrdma_ring_next_elem_write(PvrdmaRing *ring);
|
||||
void pvrdma_ring_write_inc(PvrdmaRing *ring);
|
||||
void pvrdma_ring_free(PvrdmaRing *ring);
|
||||
|
||||
#endif
|
222
hw/rdma/vmw/pvrdma_qp_ops.c
Normal file
222
hw/rdma/vmw/pvrdma_qp_ops.c
Normal file
@ -0,0 +1,222 @@
|
||||
/*
|
||||
* QEMU paravirtual RDMA - QP implementation
|
||||
*
|
||||
* Copyright (C) 2018 Oracle
|
||||
* Copyright (C) 2018 Red Hat Inc
|
||||
*
|
||||
* Authors:
|
||||
* Yuval Shaia <yuval.shaia@oracle.com>
|
||||
* Marcel Apfelbaum <marcel@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <qemu/osdep.h>
|
||||
|
||||
#include "../rdma_utils.h"
|
||||
#include "../rdma_rm.h"
|
||||
#include "../rdma_backend.h"
|
||||
|
||||
#include "pvrdma.h"
|
||||
#include <standard-headers/rdma/vmw_pvrdma-abi.h>
|
||||
#include "pvrdma_qp_ops.h"
|
||||
|
||||
typedef struct CompHandlerCtx {
|
||||
PVRDMADev *dev;
|
||||
uint32_t cq_handle;
|
||||
struct pvrdma_cqe cqe;
|
||||
} CompHandlerCtx;
|
||||
|
||||
/* Send Queue WQE */
|
||||
typedef struct PvrdmaSqWqe {
|
||||
struct pvrdma_sq_wqe_hdr hdr;
|
||||
struct pvrdma_sge sge[0];
|
||||
} PvrdmaSqWqe;
|
||||
|
||||
/* Recv Queue WQE */
|
||||
typedef struct PvrdmaRqWqe {
|
||||
struct pvrdma_rq_wqe_hdr hdr;
|
||||
struct pvrdma_sge sge[0];
|
||||
} PvrdmaRqWqe;
|
||||
|
||||
/*
|
||||
* 1. Put CQE on send CQ ring
|
||||
* 2. Put CQ number on dsr completion ring
|
||||
* 3. Interrupt host
|
||||
*/
|
||||
static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
|
||||
struct pvrdma_cqe *cqe)
|
||||
{
|
||||
struct pvrdma_cqe *cqe1;
|
||||
struct pvrdma_cqne *cqne;
|
||||
PvrdmaRing *ring;
|
||||
RdmaRmCQ *cq = rdma_rm_get_cq(&dev->rdma_dev_res, cq_handle);
|
||||
|
||||
if (unlikely(!cq)) {
|
||||
pr_dbg("Invalid cqn %d\n", cq_handle);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ring = (PvrdmaRing *)cq->opaque;
|
||||
pr_dbg("ring=%p\n", ring);
|
||||
|
||||
/* Step #1: Put CQE on CQ ring */
|
||||
pr_dbg("Writing CQE\n");
|
||||
cqe1 = pvrdma_ring_next_elem_write(ring);
|
||||
if (unlikely(!cqe1)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cqe1->wr_id = cqe->wr_id;
|
||||
cqe1->qp = cqe->qp;
|
||||
cqe1->opcode = cqe->opcode;
|
||||
cqe1->status = cqe->status;
|
||||
cqe1->vendor_err = cqe->vendor_err;
|
||||
|
||||
pvrdma_ring_write_inc(ring);
|
||||
|
||||
/* Step #2: Put CQ number on dsr completion ring */
|
||||
pr_dbg("Writing CQNE\n");
|
||||
cqne = pvrdma_ring_next_elem_write(&dev->dsr_info.cq);
|
||||
if (unlikely(!cqne)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cqne->info = cq_handle;
|
||||
pvrdma_ring_write_inc(&dev->dsr_info.cq);
|
||||
|
||||
pr_dbg("cq->notify=%d\n", cq->notify);
|
||||
if (cq->notify) {
|
||||
cq->notify = false;
|
||||
post_interrupt(dev, INTR_VEC_CMD_COMPLETION_Q);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pvrdma_qp_ops_comp_handler(int status, unsigned int vendor_err,
|
||||
void *ctx)
|
||||
{
|
||||
CompHandlerCtx *comp_ctx = (CompHandlerCtx *)ctx;
|
||||
|
||||
pr_dbg("cq_handle=%d\n", comp_ctx->cq_handle);
|
||||
pr_dbg("wr_id=%ld\n", comp_ctx->cqe.wr_id);
|
||||
pr_dbg("status=%d\n", status);
|
||||
pr_dbg("vendor_err=0x%x\n", vendor_err);
|
||||
comp_ctx->cqe.status = status;
|
||||
comp_ctx->cqe.vendor_err = vendor_err;
|
||||
pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe);
|
||||
g_free(ctx);
|
||||
}
|
||||
|
||||
void pvrdma_qp_ops_fini(void)
|
||||
{
|
||||
rdma_backend_unregister_comp_handler();
|
||||
}
|
||||
|
||||
int pvrdma_qp_ops_init(void)
|
||||
{
|
||||
rdma_backend_register_comp_handler(pvrdma_qp_ops_comp_handler);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
|
||||
{
|
||||
RdmaRmQP *qp;
|
||||
PvrdmaSqWqe *wqe;
|
||||
PvrdmaRing *ring;
|
||||
|
||||
pr_dbg("qp_handle=%d\n", qp_handle);
|
||||
|
||||
qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
|
||||
if (unlikely(!qp)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ring = (PvrdmaRing *)qp->opaque;
|
||||
pr_dbg("sring=%p\n", ring);
|
||||
|
||||
wqe = (struct PvrdmaSqWqe *)pvrdma_ring_next_elem_read(ring);
|
||||
while (wqe) {
|
||||
CompHandlerCtx *comp_ctx;
|
||||
|
||||
pr_dbg("wr_id=%ld\n", wqe->hdr.wr_id);
|
||||
|
||||
/* Prepare CQE */
|
||||
comp_ctx = g_malloc(sizeof(CompHandlerCtx));
|
||||
comp_ctx->dev = dev;
|
||||
comp_ctx->cq_handle = qp->send_cq_handle;
|
||||
comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
|
||||
comp_ctx->cqe.qp = qp_handle;
|
||||
comp_ctx->cqe.opcode = wqe->hdr.opcode;
|
||||
|
||||
rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type,
|
||||
(struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
|
||||
(union ibv_gid *)wqe->hdr.wr.ud.av.dgid,
|
||||
wqe->hdr.wr.ud.remote_qpn,
|
||||
wqe->hdr.wr.ud.remote_qkey, comp_ctx);
|
||||
|
||||
pvrdma_ring_read_inc(ring);
|
||||
|
||||
wqe = pvrdma_ring_next_elem_read(ring);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
|
||||
{
|
||||
RdmaRmQP *qp;
|
||||
PvrdmaRqWqe *wqe;
|
||||
PvrdmaRing *ring;
|
||||
|
||||
pr_dbg("qp_handle=%d\n", qp_handle);
|
||||
|
||||
qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
|
||||
if (unlikely(!qp)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ring = &((PvrdmaRing *)qp->opaque)[1];
|
||||
pr_dbg("rring=%p\n", ring);
|
||||
|
||||
wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
|
||||
while (wqe) {
|
||||
CompHandlerCtx *comp_ctx;
|
||||
|
||||
pr_dbg("wr_id=%ld\n", wqe->hdr.wr_id);
|
||||
|
||||
/* Prepare CQE */
|
||||
comp_ctx = g_malloc(sizeof(CompHandlerCtx));
|
||||
comp_ctx->dev = dev;
|
||||
comp_ctx->cq_handle = qp->recv_cq_handle;
|
||||
comp_ctx->cqe.qp = qp_handle;
|
||||
comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
|
||||
|
||||
rdma_backend_post_recv(&dev->backend_dev, &dev->rdma_dev_res,
|
||||
&qp->backend_qp, qp->qp_type,
|
||||
(struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
|
||||
comp_ctx);
|
||||
|
||||
pvrdma_ring_read_inc(ring);
|
||||
|
||||
wqe = pvrdma_ring_next_elem_read(ring);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
|
||||
{
|
||||
RdmaRmCQ *cq;
|
||||
|
||||
cq = rdma_rm_get_cq(dev_res, cq_handle);
|
||||
if (!cq) {
|
||||
pr_dbg("Invalid CQ# %d\n", cq_handle);
|
||||
}
|
||||
|
||||
rdma_backend_poll_cq(dev_res, &cq->backend_cq);
|
||||
}
|
27
hw/rdma/vmw/pvrdma_qp_ops.h
Normal file
27
hw/rdma/vmw/pvrdma_qp_ops.h
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
* QEMU VMWARE paravirtual RDMA QP Operations
|
||||
*
|
||||
* Copyright (C) 2018 Oracle
|
||||
* Copyright (C) 2018 Red Hat Inc
|
||||
*
|
||||
* Authors:
|
||||
* Yuval Shaia <yuval.shaia@oracle.com>
|
||||
* Marcel Apfelbaum <marcel@redhat.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
* See the COPYING file in the top-level directory.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef PVRDMA_QP_H
|
||||
#define PVRDMA_QP_H
|
||||
|
||||
#include "pvrdma.h"
|
||||
|
||||
int pvrdma_qp_ops_init(void);
|
||||
void pvrdma_qp_ops_fini(void);
|
||||
int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle);
|
||||
int pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle);
|
||||
void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle);
|
||||
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user