nvme/rdma: eliminate bounce buffer copy
Register all spdk_malloc() memory regions as ibv_mr in a spdk_mem_map so we can look up the RDMA key for the user's buffer and pass it in the SGL directly, rather than copying through a pre-registered bounce buffer. Change-Id: I7340bc2020b5256750c95dbd24ba67961404e5e7 Signed-off-by: Daniel Verkamp <daniel.verkamp@intel.com>
This commit is contained in:
parent
ee2eda24bc
commit
83e556534b
@ -109,6 +109,9 @@ struct nvme_rdma_qpair {
|
||||
/* Memory region describing all cmds for this qpair */
|
||||
struct ibv_mr *cmd_mr;
|
||||
|
||||
/* Mapping from virtual address to ibv_mr pointer */
|
||||
struct spdk_mem_map *mr_map;
|
||||
|
||||
STAILQ_HEAD(, spdk_nvme_rdma_req) free_reqs;
|
||||
};
|
||||
|
||||
@ -119,17 +122,8 @@ struct spdk_nvme_rdma_req {
|
||||
|
||||
struct nvme_request *req;
|
||||
|
||||
enum spdk_nvme_data_transfer xfer;
|
||||
|
||||
struct ibv_sge send_sgl;
|
||||
|
||||
struct ibv_mr *bb_mr;
|
||||
|
||||
/* Cached value of bb_mr->rkey */
|
||||
uint32_t bb_rkey;
|
||||
|
||||
uint8_t *bb;
|
||||
|
||||
STAILQ_ENTRY(spdk_nvme_rdma_req) link;
|
||||
};
|
||||
|
||||
@ -335,25 +329,10 @@ fail:
|
||||
static void
|
||||
nvme_rdma_free_reqs(struct nvme_rdma_qpair *rqpair)
|
||||
{
|
||||
struct spdk_nvme_rdma_req *rdma_req;
|
||||
int i;
|
||||
|
||||
if (!rqpair->rdma_reqs) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < rqpair->num_entries; i++) {
|
||||
rdma_req = &rqpair->rdma_reqs[i];
|
||||
|
||||
if (rdma_req->bb_mr && ibv_dereg_mr(rdma_req->bb_mr)) {
|
||||
SPDK_ERRLOG("Unable to de-register bb_mr\n");
|
||||
}
|
||||
|
||||
if (rdma_req->bb) {
|
||||
spdk_free(rdma_req->bb);
|
||||
}
|
||||
}
|
||||
|
||||
if (rqpair->cmd_mr && rdma_dereg_mr(rqpair->cmd_mr)) {
|
||||
SPDK_ERRLOG("Unable to de-register cmd_mr\n");
|
||||
}
|
||||
@ -404,23 +383,6 @@ nvme_rdma_alloc_reqs(struct nvme_rdma_qpair *rqpair)
|
||||
rdma_req->send_sgl.length = sizeof(*cmd);
|
||||
rdma_req->send_sgl.lkey = rqpair->cmd_mr->lkey;
|
||||
|
||||
rdma_req->bb = spdk_zmalloc(NVME_RDMA_RW_BUFFER_SIZE, 64, NULL);
|
||||
if (!rdma_req->bb) {
|
||||
SPDK_ERRLOG("Unable to register allocate read/write buffer\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
rdma_req->bb_mr = ibv_reg_mr(rqpair->cm_id->qp->pd, rdma_req->bb, NVME_RDMA_RW_BUFFER_SIZE,
|
||||
IBV_ACCESS_LOCAL_WRITE |
|
||||
IBV_ACCESS_REMOTE_READ |
|
||||
IBV_ACCESS_REMOTE_WRITE);
|
||||
if (!rdma_req->bb_mr) {
|
||||
SPDK_ERRLOG("Unable to register bb_mr\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
rdma_req->bb_rkey = rdma_req->bb_mr->rkey;
|
||||
|
||||
rdma_req->send_wr.wr_id = (uint64_t)rdma_req;
|
||||
rdma_req->send_wr.next = NULL;
|
||||
rdma_req->send_wr.opcode = IBV_WR_SEND;
|
||||
@ -439,72 +401,6 @@ fail:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int
|
||||
nvme_rdma_copy_mem(struct spdk_nvme_rdma_req *rdma_req, bool copy_from_user)
|
||||
{
|
||||
int rc;
|
||||
uint32_t remaining_transfer_len, len, offset = 0;
|
||||
void *addr, *src, *dst;
|
||||
struct spdk_nvme_sgl_descriptor *nvme_sgl;
|
||||
struct nvme_request *req = rdma_req->req;
|
||||
|
||||
if (!req->payload_size) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvme_sgl = &req->cmd.dptr.sgl1;
|
||||
if (req->payload.type == NVME_PAYLOAD_TYPE_CONTIG) {
|
||||
addr = (void *)((uint64_t)req->payload.u.contig + req->payload_offset);
|
||||
if (!addr) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
len = req->payload_size;
|
||||
if (copy_from_user) {
|
||||
src = addr;
|
||||
dst = (void *)nvme_sgl->address;
|
||||
} else {
|
||||
src = (void *)nvme_sgl->address;
|
||||
dst = addr;
|
||||
}
|
||||
memcpy(dst, src, len);
|
||||
|
||||
} else {
|
||||
if (!req->payload.u.sgl.reset_sgl_fn ||
|
||||
!req->payload.u.sgl.next_sge_fn) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
req->payload.u.sgl.reset_sgl_fn(req->payload.u.sgl.cb_arg, req->payload_offset);
|
||||
remaining_transfer_len = req->payload_size;
|
||||
|
||||
while (remaining_transfer_len > 0) {
|
||||
rc = req->payload.u.sgl.next_sge_fn(req->payload.u.sgl.cb_arg,
|
||||
&addr, &len);
|
||||
if (rc || !addr) {
|
||||
SPDK_ERRLOG("Invalid address returned from user next_sge_fn callback\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
len = spdk_min(remaining_transfer_len, len);
|
||||
remaining_transfer_len -= len;
|
||||
|
||||
if (copy_from_user) {
|
||||
src = addr;
|
||||
dst = (void *)nvme_sgl->address + offset;
|
||||
} else {
|
||||
src = (void *)nvme_sgl->address + offset;
|
||||
dst = addr;
|
||||
}
|
||||
memcpy(dst, src, len);
|
||||
|
||||
offset += len;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
nvme_rdma_recv(struct nvme_rdma_qpair *rqpair, uint64_t rsp_idx)
|
||||
{
|
||||
@ -517,17 +413,9 @@ nvme_rdma_recv(struct nvme_rdma_qpair *rqpair, uint64_t rsp_idx)
|
||||
rsp = &rqpair->rsps[rsp_idx];
|
||||
rdma_req = &rqpair->rdma_reqs[rsp->cid];
|
||||
|
||||
if (rdma_req->xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
|
||||
if (nvme_rdma_copy_mem(rdma_req, false) < 0) {
|
||||
SPDK_ERRLOG("Failed to copy to user memory\n");
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
req = rdma_req->req;
|
||||
nvme_rdma_req_complete(req, rsp);
|
||||
|
||||
done:
|
||||
nvme_rdma_req_put(rqpair, rdma_req);
|
||||
if (nvme_rdma_post_recv(rqpair, rsp_idx)) {
|
||||
SPDK_ERRLOG("Unable to re-post rx descriptor\n");
|
||||
@ -745,6 +633,62 @@ ret:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void
|
||||
nvme_rdma_mr_map_notify(void *cb_ctx, struct spdk_mem_map *map,
|
||||
enum spdk_mem_map_notify_action action,
|
||||
void *vaddr, size_t size)
|
||||
{
|
||||
struct ibv_pd *pd = cb_ctx;
|
||||
struct ibv_mr *mr;
|
||||
|
||||
switch (action) {
|
||||
case SPDK_MEM_MAP_NOTIFY_REGISTER:
|
||||
mr = ibv_reg_mr(pd, vaddr, size,
|
||||
IBV_ACCESS_LOCAL_WRITE |
|
||||
IBV_ACCESS_REMOTE_READ |
|
||||
IBV_ACCESS_REMOTE_WRITE);
|
||||
if (mr == NULL) {
|
||||
SPDK_ERRLOG("ibv_reg_mr() failed\n");
|
||||
} else {
|
||||
spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
|
||||
}
|
||||
break;
|
||||
case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
|
||||
mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr);
|
||||
spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
|
||||
if (mr) {
|
||||
ibv_dereg_mr(mr);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
nvme_rdma_register_mem(struct nvme_rdma_qpair *rqpair)
|
||||
{
|
||||
struct ibv_pd *pd = rqpair->cm_id->qp->pd;
|
||||
struct spdk_mem_map *mr_map;
|
||||
|
||||
// TODO: look up existing mem map registration for this pd
|
||||
|
||||
mr_map = spdk_mem_map_alloc((uint64_t)NULL, nvme_rdma_mr_map_notify, pd);
|
||||
if (mr_map == NULL) {
|
||||
SPDK_ERRLOG("spdk_mem_map_alloc() failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
rqpair->mr_map = mr_map;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
nvme_rdma_unregister_mem(struct nvme_rdma_qpair *rqpair)
|
||||
{
|
||||
spdk_mem_map_free(&rqpair->mr_map);
|
||||
}
|
||||
|
||||
static int
|
||||
nvme_rdma_qpair_connect(struct nvme_rdma_qpair *rqpair)
|
||||
{
|
||||
@ -824,6 +768,12 @@ nvme_rdma_qpair_connect(struct nvme_rdma_qpair *rqpair)
|
||||
}
|
||||
SPDK_TRACELOG(SPDK_TRACE_DEBUG, "RDMA responses allocated\n");
|
||||
|
||||
rc = nvme_rdma_register_mem(rqpair);
|
||||
if (rc < 0) {
|
||||
SPDK_ERRLOG("Unable to register memory for RDMA\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
rc = nvme_rdma_qpair_fabric_connect(rqpair);
|
||||
if (rc < 0) {
|
||||
SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n");
|
||||
@ -833,40 +783,93 @@ nvme_rdma_qpair_connect(struct nvme_rdma_qpair *rqpair)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build SGL list describing scattered payload buffer.
|
||||
/*
|
||||
* Build SGL describing empty payload.
|
||||
*/
|
||||
static int
|
||||
nvme_rdma_build_sgl_request(struct spdk_nvme_rdma_req *rdma_req)
|
||||
nvme_rdma_build_null_request(struct nvme_request *req)
|
||||
{
|
||||
struct spdk_nvme_sgl_descriptor *nvme_sgl;
|
||||
struct nvme_request *req = rdma_req->req;
|
||||
|
||||
if (req->payload_size > rdma_req->bb_mr->length) {
|
||||
return -1;
|
||||
}
|
||||
req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_SGL;
|
||||
|
||||
if ((req->payload.type != NVME_PAYLOAD_TYPE_CONTIG) &&
|
||||
(req->payload.type != NVME_PAYLOAD_TYPE_SGL)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* setup the RDMA SGL details */
|
||||
nvme_sgl = &req->cmd.dptr.sgl1;
|
||||
nvme_sgl->keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
|
||||
nvme_sgl->keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
|
||||
nvme_sgl->keyed.length = req->payload_size;
|
||||
nvme_sgl->keyed.key = rdma_req->bb_rkey;
|
||||
nvme_sgl->address = (uint64_t)rdma_req->bb;
|
||||
req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_SGL;
|
||||
nvme_sgl->keyed.length = 0;
|
||||
nvme_sgl->keyed.key = 0;
|
||||
nvme_sgl->address = 0;
|
||||
|
||||
if (rdma_req->xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
|
||||
if (nvme_rdma_copy_mem(rdma_req, true) < 0) {
|
||||
SPDK_ERRLOG("Failed to copy from user memory\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Build SGL describing contiguous payload buffer.
|
||||
*/
|
||||
static int
|
||||
nvme_rdma_build_contig_request(struct nvme_rdma_qpair *rqpair, struct nvme_request *req)
|
||||
{
|
||||
void *payload = req->payload.u.contig + req->payload_offset;
|
||||
struct ibv_mr *mr;
|
||||
|
||||
assert(req->payload_size != 0);
|
||||
assert(req->payload.type == NVME_PAYLOAD_TYPE_CONTIG);
|
||||
|
||||
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map, (uint64_t)payload);
|
||||
if (mr == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_SGL;
|
||||
req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
|
||||
req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
|
||||
req->cmd.dptr.sgl1.keyed.length = req->payload_size;
|
||||
req->cmd.dptr.sgl1.keyed.key = mr->rkey;
|
||||
req->cmd.dptr.sgl1.address = (uint64_t)payload;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Build SGL describing scattered payload buffer.
|
||||
*/
|
||||
static int
|
||||
nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair, struct nvme_request *req)
|
||||
{
|
||||
int rc;
|
||||
void *virt_addr;
|
||||
struct ibv_mr *mr;
|
||||
uint32_t length;
|
||||
|
||||
assert(req->payload_size != 0);
|
||||
assert(req->payload.type == NVME_PAYLOAD_TYPE_SGL);
|
||||
assert(req->payload.u.sgl.reset_sgl_fn != NULL);
|
||||
assert(req->payload.u.sgl.next_sge_fn != NULL);
|
||||
req->payload.u.sgl.reset_sgl_fn(req->payload.u.sgl.cb_arg, req->payload_offset);
|
||||
|
||||
/* TODO: for now, we only support a single SGL entry */
|
||||
rc = req->payload.u.sgl.next_sge_fn(req->payload.u.sgl.cb_arg, &virt_addr, &length);
|
||||
if (rc) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (length != req->payload_size) {
|
||||
SPDK_ERRLOG("multi-element SGL currently not supported for RDMA\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map, (uint64_t)virt_addr);
|
||||
if (mr == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_SGL;
|
||||
req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
|
||||
req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
|
||||
req->cmd.dptr.sgl1.keyed.length = length;
|
||||
req->cmd.dptr.sgl1.keyed.key = mr->rkey;
|
||||
req->cmd.dptr.sgl1.address = (uint64_t)virt_addr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -874,25 +877,23 @@ static int
|
||||
nvme_rdma_req_init(struct nvme_rdma_qpair *rqpair, struct nvme_request *req,
|
||||
struct spdk_nvme_rdma_req *rdma_req)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rdma_req->req = req;
|
||||
req->cmd.cid = rdma_req->id;
|
||||
|
||||
if (req->cmd.opc == SPDK_NVME_OPC_FABRIC) {
|
||||
struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd;
|
||||
rdma_req->xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype);
|
||||
if (req->payload_size == 0) {
|
||||
rc = nvme_rdma_build_null_request(req);
|
||||
} else if (req->payload.type == NVME_PAYLOAD_TYPE_CONTIG) {
|
||||
rc = nvme_rdma_build_contig_request(rqpair, req);
|
||||
} else if (req->payload.type == NVME_PAYLOAD_TYPE_SGL) {
|
||||
rc = nvme_rdma_build_sgl_request(rqpair, req);
|
||||
} else {
|
||||
rdma_req->xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc);
|
||||
rc = -1;
|
||||
}
|
||||
|
||||
/* We do not support bi-directional transfer yet */
|
||||
if (rdma_req->xfer == SPDK_NVME_DATA_BIDIRECTIONAL) {
|
||||
SPDK_ERRLOG("Do not support bi-directional data transfer\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (nvme_rdma_build_sgl_request(rdma_req) < 0) {
|
||||
return -1;
|
||||
if (rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
memcpy(&rqpair->cmds[rdma_req->id], &req->cmd, sizeof(req->cmd));
|
||||
@ -1021,6 +1022,7 @@ nvme_rdma_qpair_destroy(struct spdk_nvme_qpair *qpair)
|
||||
|
||||
rqpair = nvme_rdma_qpair(qpair);
|
||||
|
||||
nvme_rdma_unregister_mem(rqpair);
|
||||
nvme_rdma_free_reqs(rqpair);
|
||||
nvme_rdma_free_rsps(rqpair);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user