nvme: Add mechanism to override RDMA pd/mr behavior
Add a mechanism to modify the RDMA transport's behavior when creating protection domains and registering memory. This is entirely optional. Change-Id: I7cd850e76a673bf5521ca4815b779c53ab9567e8 Signed-off-by: zkhatami88 <z.khatami88@gmail.com> Reviewed-on: https://review.gerrithub.io/421415 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
161af0b5cb
commit
9fb6947617
@ -44,6 +44,7 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "spdk/config.h"
|
||||||
#include "spdk/env.h"
|
#include "spdk/env.h"
|
||||||
#include "spdk/nvme_spec.h"
|
#include "spdk/nvme_spec.h"
|
||||||
#include "spdk/nvmf_spec.h"
|
#include "spdk/nvmf_spec.h"
|
||||||
@ -2038,6 +2039,60 @@ void spdk_nvme_qpair_remove_cmd_error_injection(struct spdk_nvme_ctrlr *ctrlr,
|
|||||||
struct spdk_nvme_qpair *qpair,
|
struct spdk_nvme_qpair *qpair,
|
||||||
uint8_t opc);
|
uint8_t opc);
|
||||||
|
|
||||||
|
#ifdef SPDK_CONFIG_RDMA
|
||||||
|
struct ibv_context;
|
||||||
|
struct ibv_pd;
|
||||||
|
struct ibv_mr;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RDMA Transport Hooks
|
||||||
|
*/
|
||||||
|
struct spdk_nvme_rdma_hooks {
|
||||||
|
/**
|
||||||
|
* \brief Get a transport id specific context to be passed to
|
||||||
|
* the other hooks.
|
||||||
|
*
|
||||||
|
* \param trid the transport id
|
||||||
|
*
|
||||||
|
* \return ctx to be passed to the other hooks
|
||||||
|
*/
|
||||||
|
void *(*get_ctx)(const struct spdk_nvme_transport_id *trid);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Get an InfiniBand Verbs protection domain.
|
||||||
|
*
|
||||||
|
* \param ctx Context returned from get_hook_ctx.
|
||||||
|
* \param verbs Infiniband verbs context
|
||||||
|
*
|
||||||
|
* \return pd of the nvme ctrlr
|
||||||
|
*/
|
||||||
|
struct ibv_pd *(*get_ibv_pd)(void *ctx, struct ibv_context *verbs);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Get an InfiniBand Verbs memory region for a buffer.
|
||||||
|
*
|
||||||
|
* \param ctx Context returned from get_hook_ctx.
|
||||||
|
* \param buf Memory buffer for which an rkey should be returned.
|
||||||
|
* \param size size of buf
|
||||||
|
*
|
||||||
|
* \return Infiniband remote key (rkey) for this buf
|
||||||
|
*/
|
||||||
|
uint64_t (*get_rkey)(void *ctx, void *buf, size_t size);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Set the global hooks for the RDMA transport, if necessary.
|
||||||
|
*
|
||||||
|
* This call is optional and must be performed prior to probing for
|
||||||
|
* any devices. By default, the RDMA transport will use the ibverbs
|
||||||
|
* library to create protection domains and register memory. This
|
||||||
|
* is a mechanism to subvert that and use an existing registration.
|
||||||
|
*
|
||||||
|
* \param hooks for initializing global hooks
|
||||||
|
*/
|
||||||
|
void spdk_nvme_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
@ -71,6 +71,8 @@ struct spdk_nvmf_cmd {
|
|||||||
struct spdk_nvme_sgl_descriptor sgl[NVME_RDMA_MAX_SGL_DESCRIPTORS];
|
struct spdk_nvme_sgl_descriptor sgl[NVME_RDMA_MAX_SGL_DESCRIPTORS];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct spdk_nvme_rdma_hooks g_nvme_hooks = {};
|
||||||
|
|
||||||
/* Mapping from virtual address to ibv_mr pointer for a protection domain */
|
/* Mapping from virtual address to ibv_mr pointer for a protection domain */
|
||||||
struct spdk_nvme_rdma_mr_map {
|
struct spdk_nvme_rdma_mr_map {
|
||||||
struct ibv_pd *pd;
|
struct ibv_pd *pd;
|
||||||
@ -82,6 +84,10 @@ struct spdk_nvme_rdma_mr_map {
|
|||||||
/* NVMe RDMA transport extensions for spdk_nvme_ctrlr */
|
/* NVMe RDMA transport extensions for spdk_nvme_ctrlr */
|
||||||
struct nvme_rdma_ctrlr {
|
struct nvme_rdma_ctrlr {
|
||||||
struct spdk_nvme_ctrlr ctrlr;
|
struct spdk_nvme_ctrlr ctrlr;
|
||||||
|
|
||||||
|
struct spdk_nvme_rdma_hooks hooks;
|
||||||
|
void *hook_ctx;
|
||||||
|
struct ibv_pd *pd;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* NVMe RDMA qpair extensions for spdk_nvme_qpair */
|
/* NVMe RDMA qpair extensions for spdk_nvme_qpair */
|
||||||
@ -241,6 +247,7 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
|
|||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
struct ibv_qp_init_attr attr;
|
struct ibv_qp_init_attr attr;
|
||||||
|
struct nvme_rdma_ctrlr *rctrlr;
|
||||||
|
|
||||||
rqpair->cq = ibv_create_cq(rqpair->cm_id->verbs, rqpair->num_entries * 2, rqpair, NULL, 0);
|
rqpair->cq = ibv_create_cq(rqpair->cm_id->verbs, rqpair->num_entries * 2, rqpair, NULL, 0);
|
||||||
if (!rqpair->cq) {
|
if (!rqpair->cq) {
|
||||||
@ -248,6 +255,13 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rctrlr = nvme_rdma_ctrlr(rqpair->qpair.ctrlr);
|
||||||
|
if (rctrlr->hooks.get_ibv_pd) {
|
||||||
|
rctrlr->pd = rctrlr->hooks.get_ibv_pd(rctrlr->hook_ctx, rqpair->cm_id->verbs);
|
||||||
|
} else {
|
||||||
|
rctrlr->pd = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
|
memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
|
||||||
attr.qp_type = IBV_QPT_RC;
|
attr.qp_type = IBV_QPT_RC;
|
||||||
attr.send_cq = rqpair->cq;
|
attr.send_cq = rqpair->cq;
|
||||||
@ -257,11 +271,12 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
|
|||||||
attr.cap.max_send_sge = NVME_RDMA_DEFAULT_TX_SGE;
|
attr.cap.max_send_sge = NVME_RDMA_DEFAULT_TX_SGE;
|
||||||
attr.cap.max_recv_sge = NVME_RDMA_DEFAULT_RX_SGE;
|
attr.cap.max_recv_sge = NVME_RDMA_DEFAULT_RX_SGE;
|
||||||
|
|
||||||
rc = rdma_create_qp(rqpair->cm_id, NULL, &attr);
|
rc = rdma_create_qp(rqpair->cm_id, rctrlr->pd, &attr);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
SPDK_ERRLOG("rdma_create_qp failed\n");
|
SPDK_ERRLOG("rdma_create_qp failed\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
rctrlr->pd = rqpair->cm_id->qp->pd;
|
||||||
|
|
||||||
rqpair->cm_id->context = &rqpair->qpair;
|
rqpair->cm_id->context = &rqpair->qpair;
|
||||||
|
|
||||||
@ -611,29 +626,38 @@ nvme_rdma_mr_map_notify(void *cb_ctx, struct spdk_mem_map *map,
|
|||||||
enum spdk_mem_map_notify_action action,
|
enum spdk_mem_map_notify_action action,
|
||||||
void *vaddr, size_t size)
|
void *vaddr, size_t size)
|
||||||
{
|
{
|
||||||
struct ibv_pd *pd = cb_ctx;
|
struct nvme_rdma_ctrlr *rctrlr = cb_ctx;
|
||||||
|
struct ibv_pd *pd;
|
||||||
struct ibv_mr *mr;
|
struct ibv_mr *mr;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
switch (action) {
|
switch (action) {
|
||||||
case SPDK_MEM_MAP_NOTIFY_REGISTER:
|
case SPDK_MEM_MAP_NOTIFY_REGISTER:
|
||||||
mr = ibv_reg_mr(pd, vaddr, size,
|
if (!rctrlr->hooks.get_rkey) {
|
||||||
IBV_ACCESS_LOCAL_WRITE |
|
pd = rctrlr->pd;
|
||||||
IBV_ACCESS_REMOTE_READ |
|
mr = ibv_reg_mr(pd, vaddr, size,
|
||||||
IBV_ACCESS_REMOTE_WRITE);
|
IBV_ACCESS_LOCAL_WRITE |
|
||||||
if (mr == NULL) {
|
IBV_ACCESS_REMOTE_READ |
|
||||||
SPDK_ERRLOG("ibv_reg_mr() failed\n");
|
IBV_ACCESS_REMOTE_WRITE);
|
||||||
return -EFAULT;
|
if (mr == NULL) {
|
||||||
|
SPDK_ERRLOG("ibv_reg_mr() failed\n");
|
||||||
|
return -EFAULT;
|
||||||
|
} else {
|
||||||
|
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
|
rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size,
|
||||||
|
rctrlr->hooks.get_rkey(rctrlr->hook_ctx, vaddr, size));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
|
case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
|
||||||
mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
|
if (!rctrlr->hooks.get_rkey) {
|
||||||
rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
|
mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
|
||||||
if (mr) {
|
if (mr) {
|
||||||
ibv_dereg_mr(mr);
|
ibv_dereg_mr(mr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
SPDK_UNREACHABLE();
|
SPDK_UNREACHABLE();
|
||||||
@ -673,7 +697,8 @@ nvme_rdma_register_mem(struct nvme_rdma_qpair *rqpair)
|
|||||||
|
|
||||||
mr_map->ref = 1;
|
mr_map->ref = 1;
|
||||||
mr_map->pd = pd;
|
mr_map->pd = pd;
|
||||||
mr_map->map = spdk_mem_map_alloc((uint64_t)NULL, &nvme_rdma_map_ops, pd);
|
mr_map->map = spdk_mem_map_alloc((uint64_t)NULL, &nvme_rdma_map_ops,
|
||||||
|
nvme_rdma_ctrlr(rqpair->qpair.ctrlr));
|
||||||
if (mr_map->map == NULL) {
|
if (mr_map->map == NULL) {
|
||||||
SPDK_ERRLOG("spdk_mem_map_alloc() failed\n");
|
SPDK_ERRLOG("spdk_mem_map_alloc() failed\n");
|
||||||
free(mr_map);
|
free(mr_map);
|
||||||
@ -918,9 +943,21 @@ nvme_rdma_build_contig_request(struct nvme_rdma_qpair *rqpair,
|
|||||||
assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
|
assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
|
||||||
|
|
||||||
requested_size = req->payload_size;
|
requested_size = req->payload_size;
|
||||||
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)payload,
|
if (!nvme_rdma_ctrlr(rqpair->qpair.ctrlr)->hooks.get_rkey) {
|
||||||
&requested_size);
|
|
||||||
if (mr == NULL || requested_size < req->payload_size) {
|
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)payload,
|
||||||
|
&requested_size);
|
||||||
|
if (mr == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
req->cmd.dptr.sgl1.keyed.key = mr->rkey;
|
||||||
|
} else {
|
||||||
|
req->cmd.dptr.sgl1.keyed.key = spdk_mem_map_translate(rqpair->mr_map->map,
|
||||||
|
(uint64_t)payload,
|
||||||
|
&requested_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (requested_size < req->payload_size) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -937,7 +974,6 @@ nvme_rdma_build_contig_request(struct nvme_rdma_qpair *rqpair,
|
|||||||
req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
|
req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
|
||||||
req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
|
req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
|
||||||
req->cmd.dptr.sgl1.keyed.length = req->payload_size;
|
req->cmd.dptr.sgl1.keyed.length = req->payload_size;
|
||||||
req->cmd.dptr.sgl1.keyed.key = mr->rkey;
|
|
||||||
req->cmd.dptr.sgl1.address = (uint64_t)payload;
|
req->cmd.dptr.sgl1.address = (uint64_t)payload;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -977,17 +1013,27 @@ nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair,
|
|||||||
sge_length = spdk_min(remaining_size, sge_length);
|
sge_length = spdk_min(remaining_size, sge_length);
|
||||||
mr_length = sge_length;
|
mr_length = sge_length;
|
||||||
|
|
||||||
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)virt_addr,
|
if (!nvme_rdma_ctrlr(rqpair->qpair.ctrlr)->hooks.get_rkey) {
|
||||||
&mr_length);
|
mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map,
|
||||||
|
(uint64_t)virt_addr,
|
||||||
|
&mr_length);
|
||||||
|
if (mr == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
cmd->sgl[num_sgl_desc].keyed.key = mr->rkey;
|
||||||
|
} else {
|
||||||
|
cmd->sgl[num_sgl_desc].keyed.key = spdk_mem_map_translate(rqpair->mr_map->map,
|
||||||
|
(uint64_t)virt_addr,
|
||||||
|
&mr_length);
|
||||||
|
}
|
||||||
|
|
||||||
if (mr == NULL || mr_length < sge_length) {
|
if (mr_length < sge_length) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd->sgl[num_sgl_desc].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
|
cmd->sgl[num_sgl_desc].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
|
||||||
cmd->sgl[num_sgl_desc].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
|
cmd->sgl[num_sgl_desc].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
|
||||||
cmd->sgl[num_sgl_desc].keyed.length = sge_length;
|
cmd->sgl[num_sgl_desc].keyed.length = sge_length;
|
||||||
cmd->sgl[num_sgl_desc].keyed.key = mr->rkey;
|
|
||||||
cmd->sgl[num_sgl_desc].address = (uint64_t)virt_addr;
|
cmd->sgl[num_sgl_desc].address = (uint64_t)virt_addr;
|
||||||
|
|
||||||
remaining_size -= sge_length;
|
remaining_size -= sge_length;
|
||||||
@ -1017,11 +1063,11 @@ nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair,
|
|||||||
* the NVMe command. */
|
* the NVMe command. */
|
||||||
rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
|
rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
|
||||||
|
|
||||||
req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
|
req->cmd.dptr.sgl1.keyed.type = cmd->sgl[0].keyed.type;
|
||||||
req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
|
req->cmd.dptr.sgl1.keyed.subtype = cmd->sgl[0].keyed.subtype;
|
||||||
req->cmd.dptr.sgl1.keyed.length = req->payload_size;
|
req->cmd.dptr.sgl1.keyed.length = cmd->sgl[0].keyed.length;
|
||||||
req->cmd.dptr.sgl1.keyed.key = mr->rkey;
|
req->cmd.dptr.sgl1.keyed.key = cmd->sgl[0].keyed.key;
|
||||||
req->cmd.dptr.sgl1.address = rqpair->cmds[rdma_req->id].sgl[0].address;
|
req->cmd.dptr.sgl1.address = cmd->sgl[0].address;
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* Otherwise, The SGL descriptor embedded in the command must point to the list of
|
* Otherwise, The SGL descriptor embedded in the command must point to the list of
|
||||||
@ -1363,6 +1409,11 @@ struct spdk_nvme_ctrlr *nvme_rdma_ctrlr_construct(const struct spdk_nvme_transpo
|
|||||||
|
|
||||||
nvme_ctrlr_init_cap(&rctrlr->ctrlr, &cap, &vs);
|
nvme_ctrlr_init_cap(&rctrlr->ctrlr, &cap, &vs);
|
||||||
|
|
||||||
|
if (g_nvme_hooks.get_ctx) {
|
||||||
|
rctrlr->hooks = g_nvme_hooks;
|
||||||
|
rctrlr->hook_ctx = rctrlr->hooks.get_ctx(&rctrlr->ctrlr.trid);
|
||||||
|
}
|
||||||
|
|
||||||
SPDK_DEBUGLOG(SPDK_LOG_NVME, "successfully initialized the nvmf ctrlr\n");
|
SPDK_DEBUGLOG(SPDK_LOG_NVME, "successfully initialized the nvmf ctrlr\n");
|
||||||
return &rctrlr->ctrlr;
|
return &rctrlr->ctrlr;
|
||||||
}
|
}
|
||||||
@ -1632,3 +1683,9 @@ nvme_rdma_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, siz
|
|||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
spdk_nvme_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks)
|
||||||
|
{
|
||||||
|
g_nvme_hooks = *hooks;
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user