nvme: Add mechanism to override RDMA pd/mr behavior

Add a mechanism to modify the RDMA transport's behavior
when creating protection domains and registering memory.
This is entirely optional.

Change-Id: I7cd850e76a673bf5521ca4815b779c53ab9567e8
Signed-off-by: zkhatami88 <z.khatami88@gmail.com>
Reviewed-on: https://review.gerrithub.io/421415
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
zkhatami88 2018-08-06 16:27:13 -07:00 committed by Ben Walker
parent 161af0b5cb
commit 9fb6947617
2 changed files with 140 additions and 28 deletions

View File

@ -44,6 +44,7 @@
extern "C" { extern "C" {
#endif #endif
#include "spdk/config.h"
#include "spdk/env.h" #include "spdk/env.h"
#include "spdk/nvme_spec.h" #include "spdk/nvme_spec.h"
#include "spdk/nvmf_spec.h" #include "spdk/nvmf_spec.h"
@ -2038,6 +2039,60 @@ void spdk_nvme_qpair_remove_cmd_error_injection(struct spdk_nvme_ctrlr *ctrlr,
struct spdk_nvme_qpair *qpair, struct spdk_nvme_qpair *qpair,
uint8_t opc); uint8_t opc);
#ifdef SPDK_CONFIG_RDMA
struct ibv_context;
struct ibv_pd;
struct ibv_mr;
/**
* RDMA Transport Hooks
*/
struct spdk_nvme_rdma_hooks {
/**
* \brief Get a transport id specific context to be passed to
* the other hooks.
*
* \param trid the transport id
*
* \return ctx to be passed to the other hooks
*/
void *(*get_ctx)(const struct spdk_nvme_transport_id *trid);
/**
* \brief Get an InfiniBand Verbs protection domain.
*
* \param ctx Context returned from get_hook_ctx.
* \param verbs Infiniband verbs context
*
* \return pd of the nvme ctrlr
*/
struct ibv_pd *(*get_ibv_pd)(void *ctx, struct ibv_context *verbs);
/**
* \brief Get an InfiniBand Verbs memory region for a buffer.
*
* \param ctx Context returned from get_hook_ctx.
* \param buf Memory buffer for which an rkey should be returned.
* \param size size of buf
*
* \return Infiniband remote key (rkey) for this buf
*/
uint64_t (*get_rkey)(void *ctx, void *buf, size_t size);
};
/**
* \brief Set the global hooks for the RDMA transport, if necessary.
*
* This call is optional and must be performed prior to probing for
* any devices. By default, the RDMA transport will use the ibverbs
* library to create protection domains and register memory. This
* is a mechanism to subvert that and use an existing registration.
*
* \param hooks for initializing global hooks
*/
void spdk_nvme_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks);
#endif
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -71,6 +71,8 @@ struct spdk_nvmf_cmd {
struct spdk_nvme_sgl_descriptor sgl[NVME_RDMA_MAX_SGL_DESCRIPTORS]; struct spdk_nvme_sgl_descriptor sgl[NVME_RDMA_MAX_SGL_DESCRIPTORS];
}; };
struct spdk_nvme_rdma_hooks g_nvme_hooks = {};
/* Mapping from virtual address to ibv_mr pointer for a protection domain */ /* Mapping from virtual address to ibv_mr pointer for a protection domain */
struct spdk_nvme_rdma_mr_map { struct spdk_nvme_rdma_mr_map {
struct ibv_pd *pd; struct ibv_pd *pd;
@ -82,6 +84,10 @@ struct spdk_nvme_rdma_mr_map {
/* NVMe RDMA transport extensions for spdk_nvme_ctrlr */ /* NVMe RDMA transport extensions for spdk_nvme_ctrlr */
struct nvme_rdma_ctrlr { struct nvme_rdma_ctrlr {
struct spdk_nvme_ctrlr ctrlr; struct spdk_nvme_ctrlr ctrlr;
struct spdk_nvme_rdma_hooks hooks;
void *hook_ctx;
struct ibv_pd *pd;
}; };
/* NVMe RDMA qpair extensions for spdk_nvme_qpair */ /* NVMe RDMA qpair extensions for spdk_nvme_qpair */
@ -241,6 +247,7 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
{ {
int rc; int rc;
struct ibv_qp_init_attr attr; struct ibv_qp_init_attr attr;
struct nvme_rdma_ctrlr *rctrlr;
rqpair->cq = ibv_create_cq(rqpair->cm_id->verbs, rqpair->num_entries * 2, rqpair, NULL, 0); rqpair->cq = ibv_create_cq(rqpair->cm_id->verbs, rqpair->num_entries * 2, rqpair, NULL, 0);
if (!rqpair->cq) { if (!rqpair->cq) {
@ -248,6 +255,13 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
return -1; return -1;
} }
rctrlr = nvme_rdma_ctrlr(rqpair->qpair.ctrlr);
if (rctrlr->hooks.get_ibv_pd) {
rctrlr->pd = rctrlr->hooks.get_ibv_pd(rctrlr->hook_ctx, rqpair->cm_id->verbs);
} else {
rctrlr->pd = NULL;
}
memset(&attr, 0, sizeof(struct ibv_qp_init_attr)); memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
attr.qp_type = IBV_QPT_RC; attr.qp_type = IBV_QPT_RC;
attr.send_cq = rqpair->cq; attr.send_cq = rqpair->cq;
@ -257,11 +271,12 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
attr.cap.max_send_sge = NVME_RDMA_DEFAULT_TX_SGE; attr.cap.max_send_sge = NVME_RDMA_DEFAULT_TX_SGE;
attr.cap.max_recv_sge = NVME_RDMA_DEFAULT_RX_SGE; attr.cap.max_recv_sge = NVME_RDMA_DEFAULT_RX_SGE;
rc = rdma_create_qp(rqpair->cm_id, NULL, &attr); rc = rdma_create_qp(rqpair->cm_id, rctrlr->pd, &attr);
if (rc) { if (rc) {
SPDK_ERRLOG("rdma_create_qp failed\n"); SPDK_ERRLOG("rdma_create_qp failed\n");
return -1; return -1;
} }
rctrlr->pd = rqpair->cm_id->qp->pd;
rqpair->cm_id->context = &rqpair->qpair; rqpair->cm_id->context = &rqpair->qpair;
@ -611,29 +626,38 @@ nvme_rdma_mr_map_notify(void *cb_ctx, struct spdk_mem_map *map,
enum spdk_mem_map_notify_action action, enum spdk_mem_map_notify_action action,
void *vaddr, size_t size) void *vaddr, size_t size)
{ {
struct ibv_pd *pd = cb_ctx; struct nvme_rdma_ctrlr *rctrlr = cb_ctx;
struct ibv_pd *pd;
struct ibv_mr *mr; struct ibv_mr *mr;
int rc; int rc;
switch (action) { switch (action) {
case SPDK_MEM_MAP_NOTIFY_REGISTER: case SPDK_MEM_MAP_NOTIFY_REGISTER:
mr = ibv_reg_mr(pd, vaddr, size, if (!rctrlr->hooks.get_rkey) {
IBV_ACCESS_LOCAL_WRITE | pd = rctrlr->pd;
IBV_ACCESS_REMOTE_READ | mr = ibv_reg_mr(pd, vaddr, size,
IBV_ACCESS_REMOTE_WRITE); IBV_ACCESS_LOCAL_WRITE |
if (mr == NULL) { IBV_ACCESS_REMOTE_READ |
SPDK_ERRLOG("ibv_reg_mr() failed\n"); IBV_ACCESS_REMOTE_WRITE);
return -EFAULT; if (mr == NULL) {
SPDK_ERRLOG("ibv_reg_mr() failed\n");
return -EFAULT;
} else {
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
}
} else { } else {
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr); rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size,
rctrlr->hooks.get_rkey(rctrlr->hook_ctx, vaddr, size));
} }
break; break;
case SPDK_MEM_MAP_NOTIFY_UNREGISTER: case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL); if (!rctrlr->hooks.get_rkey) {
rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size); mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
if (mr) { if (mr) {
ibv_dereg_mr(mr); ibv_dereg_mr(mr);
}
} }
rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
break; break;
default: default:
SPDK_UNREACHABLE(); SPDK_UNREACHABLE();
@ -673,7 +697,8 @@ nvme_rdma_register_mem(struct nvme_rdma_qpair *rqpair)
mr_map->ref = 1; mr_map->ref = 1;
mr_map->pd = pd; mr_map->pd = pd;
mr_map->map = spdk_mem_map_alloc((uint64_t)NULL, &nvme_rdma_map_ops, pd); mr_map->map = spdk_mem_map_alloc((uint64_t)NULL, &nvme_rdma_map_ops,
nvme_rdma_ctrlr(rqpair->qpair.ctrlr));
if (mr_map->map == NULL) { if (mr_map->map == NULL) {
SPDK_ERRLOG("spdk_mem_map_alloc() failed\n"); SPDK_ERRLOG("spdk_mem_map_alloc() failed\n");
free(mr_map); free(mr_map);
@ -918,9 +943,21 @@ nvme_rdma_build_contig_request(struct nvme_rdma_qpair *rqpair,
assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG); assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
requested_size = req->payload_size; requested_size = req->payload_size;
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)payload, if (!nvme_rdma_ctrlr(rqpair->qpair.ctrlr)->hooks.get_rkey) {
&requested_size);
if (mr == NULL || requested_size < req->payload_size) { mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)payload,
&requested_size);
if (mr == NULL) {
return -1;
}
req->cmd.dptr.sgl1.keyed.key = mr->rkey;
} else {
req->cmd.dptr.sgl1.keyed.key = spdk_mem_map_translate(rqpair->mr_map->map,
(uint64_t)payload,
&requested_size);
}
if (requested_size < req->payload_size) {
return -1; return -1;
} }
@ -937,7 +974,6 @@ nvme_rdma_build_contig_request(struct nvme_rdma_qpair *rqpair,
req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK; req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS; req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
req->cmd.dptr.sgl1.keyed.length = req->payload_size; req->cmd.dptr.sgl1.keyed.length = req->payload_size;
req->cmd.dptr.sgl1.keyed.key = mr->rkey;
req->cmd.dptr.sgl1.address = (uint64_t)payload; req->cmd.dptr.sgl1.address = (uint64_t)payload;
return 0; return 0;
@ -977,17 +1013,27 @@ nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair,
sge_length = spdk_min(remaining_size, sge_length); sge_length = spdk_min(remaining_size, sge_length);
mr_length = sge_length; mr_length = sge_length;
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)virt_addr, if (!nvme_rdma_ctrlr(rqpair->qpair.ctrlr)->hooks.get_rkey) {
&mr_length); mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map,
(uint64_t)virt_addr,
&mr_length);
if (mr == NULL) {
return -1;
}
cmd->sgl[num_sgl_desc].keyed.key = mr->rkey;
} else {
cmd->sgl[num_sgl_desc].keyed.key = spdk_mem_map_translate(rqpair->mr_map->map,
(uint64_t)virt_addr,
&mr_length);
}
if (mr == NULL || mr_length < sge_length) { if (mr_length < sge_length) {
return -1; return -1;
} }
cmd->sgl[num_sgl_desc].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK; cmd->sgl[num_sgl_desc].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
cmd->sgl[num_sgl_desc].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS; cmd->sgl[num_sgl_desc].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
cmd->sgl[num_sgl_desc].keyed.length = sge_length; cmd->sgl[num_sgl_desc].keyed.length = sge_length;
cmd->sgl[num_sgl_desc].keyed.key = mr->rkey;
cmd->sgl[num_sgl_desc].address = (uint64_t)virt_addr; cmd->sgl[num_sgl_desc].address = (uint64_t)virt_addr;
remaining_size -= sge_length; remaining_size -= sge_length;
@ -1017,11 +1063,11 @@ nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair,
* the NVMe command. */ * the NVMe command. */
rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd); rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK; req->cmd.dptr.sgl1.keyed.type = cmd->sgl[0].keyed.type;
req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS; req->cmd.dptr.sgl1.keyed.subtype = cmd->sgl[0].keyed.subtype;
req->cmd.dptr.sgl1.keyed.length = req->payload_size; req->cmd.dptr.sgl1.keyed.length = cmd->sgl[0].keyed.length;
req->cmd.dptr.sgl1.keyed.key = mr->rkey; req->cmd.dptr.sgl1.keyed.key = cmd->sgl[0].keyed.key;
req->cmd.dptr.sgl1.address = rqpair->cmds[rdma_req->id].sgl[0].address; req->cmd.dptr.sgl1.address = cmd->sgl[0].address;
} else { } else {
/* /*
* Otherwise, The SGL descriptor embedded in the command must point to the list of * Otherwise, The SGL descriptor embedded in the command must point to the list of
@ -1363,6 +1409,11 @@ struct spdk_nvme_ctrlr *nvme_rdma_ctrlr_construct(const struct spdk_nvme_transpo
nvme_ctrlr_init_cap(&rctrlr->ctrlr, &cap, &vs); nvme_ctrlr_init_cap(&rctrlr->ctrlr, &cap, &vs);
if (g_nvme_hooks.get_ctx) {
rctrlr->hooks = g_nvme_hooks;
rctrlr->hook_ctx = rctrlr->hooks.get_ctx(&rctrlr->ctrlr.trid);
}
SPDK_DEBUGLOG(SPDK_LOG_NVME, "successfully initialized the nvmf ctrlr\n"); SPDK_DEBUGLOG(SPDK_LOG_NVME, "successfully initialized the nvmf ctrlr\n");
return &rctrlr->ctrlr; return &rctrlr->ctrlr;
} }
@ -1632,3 +1683,9 @@ nvme_rdma_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, siz
{ {
return 0; return 0;
} }
void
spdk_nvme_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks)
{
g_nvme_hooks = *hooks;
}