nvme_rdma: share the cm_event channel between qpairs.

This enables us to create a single file descriptor and a single event
channel to poll for completions. With that accomplished, we can easily
poll for events on the admin qpair each time we check it for
completions.

Change-Id: I8b901252510744a956bef12594d1e045715e002e
Signed-off-by: Seth Howell <seth.howell@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/467549
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Seth Howell 2019-09-05 15:20:56 -07:00 committed by Jim Harris
parent f12e6bc041
commit 5ac814e36c

View File

@ -63,9 +63,12 @@
#define NVME_RDMA_DEFAULT_TX_SGE 2
#define NVME_RDMA_DEFAULT_RX_SGE 1
/* Max number of NVMe-oF SGL descriptors supported by the host */
#define NVME_RDMA_MAX_SGL_DESCRIPTORS 16
/* number of STAILQ entries for holding pending RDMA CM events. */
#define NVME_RDMA_NUM_CM_EVENTS 256
struct spdk_nvmf_cmd {
struct spdk_nvme_cmd cmd;
struct spdk_nvme_sgl_descriptor sgl[NVME_RDMA_MAX_SGL_DESCRIPTORS];
@ -81,6 +84,12 @@ struct spdk_nvme_rdma_mr_map {
LIST_ENTRY(spdk_nvme_rdma_mr_map) link;
};
/* STAILQ wrapper for cm events. */
struct nvme_rdma_cm_event_entry {
struct rdma_cm_event *evt;
STAILQ_ENTRY(nvme_rdma_cm_event_entry) link;
};
/* NVMe RDMA transport extensions for spdk_nvme_ctrlr */
struct nvme_rdma_ctrlr {
struct spdk_nvme_ctrlr ctrlr;
@ -88,6 +97,14 @@ struct nvme_rdma_ctrlr {
struct ibv_pd *pd;
uint16_t max_sge;
struct rdma_event_channel *cm_channel;
STAILQ_HEAD(, nvme_rdma_cm_event_entry) pending_cm_events;
STAILQ_HEAD(, nvme_rdma_cm_event_entry) free_cm_events;
struct nvme_rdma_cm_event_entry *cm_events;
};
/* NVMe RDMA qpair extensions for spdk_nvme_qpair */
@ -130,9 +147,7 @@ struct nvme_rdma_qpair {
TAILQ_HEAD(, spdk_nvme_rdma_req) outstanding_reqs;
/* Placed at the end of the struct since it is not used frequently */
struct rdma_event_channel *cm_channel;
struct rdma_cm_event *evt;
struct rdma_cm_event *evt;
};
struct spdk_nvme_rdma_req {
@ -282,39 +297,92 @@ nvme_rdma_qpair_process_cm_event(struct nvme_rdma_qpair *rqpair)
return rc;
}
/*
* This function must be called under the nvme controller's lock
* because it touches global controller variables. The lock is taken
* by the generic transport code before invoking a few of the functions
* in this file: nvme_rdma_ctrlr_connect_qpair, nvme_rdma_ctrlr_delete_io_qpair,
* and conditionally nvme_rdma_qpair_process_completions when it is calling
* completions on the admin qpair. When adding a new call to this function, please
* verify that it is in a situation where it falls under the lock.
*/
static int
nvme_rdma_poll_events(struct nvme_rdma_ctrlr *rctrlr)
{
struct nvme_rdma_cm_event_entry *entry, *tmp;
struct nvme_rdma_qpair *event_qpair;
struct rdma_cm_event *event;
struct rdma_event_channel *channel = rctrlr->cm_channel;
STAILQ_FOREACH_SAFE(entry, &rctrlr->pending_cm_events, link, tmp) {
event_qpair = nvme_rdma_qpair(entry->evt->id->context);
if (event_qpair->evt == NULL) {
event_qpair->evt = entry->evt;
STAILQ_REMOVE(&rctrlr->pending_cm_events, entry, nvme_rdma_cm_event_entry, link);
STAILQ_INSERT_HEAD(&rctrlr->free_cm_events, entry, link);
}
}
while (rdma_get_cm_event(channel, &event) == 0) {
event_qpair = nvme_rdma_qpair(event->id->context);
if (event_qpair->evt == NULL) {
event_qpair->evt = event;
} else {
assert(rctrlr == nvme_rdma_ctrlr(event_qpair->qpair.ctrlr));
entry = STAILQ_FIRST(&rctrlr->free_cm_events);
if (entry == NULL) {
rdma_ack_cm_event(event);
return -ENOMEM;
}
STAILQ_REMOVE(&rctrlr->free_cm_events, entry, nvme_rdma_cm_event_entry, link);
entry->evt = event;
STAILQ_INSERT_TAIL(&rctrlr->pending_cm_events, entry, link);
}
}
if (errno == EAGAIN || errno == EWOULDBLOCK) {
return 0;
} else {
return errno;
}
}
static int
nvme_rdma_process_event(struct nvme_rdma_qpair *rqpair,
struct rdma_event_channel *channel,
enum rdma_cm_event_type evt)
{
struct rdma_cm_event *event;
int rc = 0;
struct nvme_rdma_ctrlr *rctrlr;
int rc = 0, rc2;
if (rqpair->evt != NULL) {
return -EINVAL;
}
while (rdma_get_cm_event(channel, &event) != 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
continue;
rc = nvme_rdma_qpair_process_cm_event(rqpair);
if (rc) {
return rc;
}
SPDK_ERRLOG("Failed to get event from CM event channel. Error %d (%s)\n",
errno, spdk_strerror(errno));
return -errno;
}
if (event->event != evt) {
rctrlr = nvme_rdma_ctrlr(rqpair->qpair.ctrlr);
assert(rctrlr != NULL);
while (!rqpair->evt && !rc) {
rc = nvme_rdma_poll_events(rctrlr);
}
if (rc) {
return rc;
}
if (rqpair->evt->event != evt) {
SPDK_ERRLOG("Expected %s but received %s (%d) from CM event channel (status = %d)\n",
nvme_rdma_cm_event_str_get(evt),
nvme_rdma_cm_event_str_get(event->event), event->event,
event->status);
nvme_rdma_cm_event_str_get(rqpair->evt->event), rqpair->evt->event,
rqpair->evt->status);
rc = -EBADMSG;
}
rqpair->evt = event;
rc |= nvme_rdma_qpair_process_cm_event(rqpair);
return rc;
rc2 = nvme_rdma_qpair_process_cm_event(rqpair);
/* bad message takes precedence over the other error codes from processing the event. */
return rc == 0 ? rc2 : rc;
}
static int
@ -652,6 +720,7 @@ nvme_rdma_connect(struct nvme_rdma_qpair *rqpair)
struct ibv_device_attr attr;
int ret;
struct spdk_nvme_ctrlr *ctrlr;
struct nvme_rdma_ctrlr *rctrlr;
ret = ibv_query_device(rqpair->cm_id->verbs, &attr);
if (ret != 0) {
@ -665,6 +734,8 @@ nvme_rdma_connect(struct nvme_rdma_qpair *rqpair)
if (!ctrlr) {
return -1;
}
rctrlr = nvme_rdma_ctrlr(ctrlr);
assert(rctrlr != NULL);
request_data.qid = rqpair->qpair.id;
request_data.hrqsize = rqpair->num_entries;
@ -682,7 +753,7 @@ nvme_rdma_connect(struct nvme_rdma_qpair *rqpair)
return ret;
}
ret = nvme_rdma_process_event(rqpair, rqpair->cm_channel, RDMA_CM_EVENT_ESTABLISHED);
ret = nvme_rdma_process_event(rqpair, rctrlr->cm_channel, RDMA_CM_EVENT_ESTABLISHED);
if (ret) {
SPDK_ERRLOG("RDMA connect error\n");
return -1;
@ -850,22 +921,12 @@ nvme_rdma_qpair_connect(struct nvme_rdma_qpair *rqpair)
bool src_addr_specified;
int rc;
struct spdk_nvme_ctrlr *ctrlr;
struct nvme_rdma_ctrlr *rctrlr;
int family;
int flag;
rqpair->cm_channel = rdma_create_event_channel();
if (rqpair->cm_channel == NULL) {
SPDK_ERRLOG("rdma_create_event_channel() failed\n");
return -1;
}
flag = fcntl(rqpair->cm_channel->fd, F_GETFL);
if (fcntl(rqpair->cm_channel->fd, F_SETFL, flag | O_NONBLOCK) < 0) {
SPDK_ERRLOG("Cannot set event channel to non blocking\n");
return -1;
}
ctrlr = rqpair->qpair.ctrlr;
rctrlr = nvme_rdma_ctrlr(ctrlr);
assert(rctrlr != NULL);
switch (ctrlr->trid.adrfam) {
case SPDK_NVMF_ADRFAM_IPV4:
@ -902,7 +963,7 @@ nvme_rdma_qpair_connect(struct nvme_rdma_qpair *rqpair)
src_addr_specified = false;
}
rc = rdma_create_id(rqpair->cm_channel, &rqpair->cm_id, rqpair, RDMA_PS_TCP);
rc = rdma_create_id(rctrlr->cm_channel, &rqpair->cm_id, rqpair, RDMA_PS_TCP);
if (rc < 0) {
SPDK_ERRLOG("rdma_create_id() failed\n");
return -1;
@ -910,7 +971,7 @@ nvme_rdma_qpair_connect(struct nvme_rdma_qpair *rqpair)
rc = nvme_rdma_resolve_addr(rqpair,
src_addr_specified ? (struct sockaddr *)&src_addr : NULL,
(struct sockaddr *)&dst_addr, rqpair->cm_channel);
(struct sockaddr *)&dst_addr, rctrlr->cm_channel);
if (rc < 0) {
SPDK_ERRLOG("nvme_rdma_resolve_addr() failed\n");
return -1;
@ -1409,11 +1470,6 @@ nvme_rdma_qpair_disconnect(struct spdk_nvme_qpair *qpair)
ibv_destroy_cq(rqpair->cq);
rqpair->cq = NULL;
}
if (rqpair->cm_channel) {
rdma_destroy_event_channel(rqpair->cm_channel);
rqpair->cm_channel = NULL;
}
}
static int
@ -1528,7 +1584,7 @@ struct spdk_nvme_ctrlr *nvme_rdma_ctrlr_construct(const struct spdk_nvme_transpo
union spdk_nvme_vs_register vs;
struct ibv_context **contexts;
struct ibv_device_attr dev_attr;
int i, rc;
int i, flag, rc;
rctrlr = calloc(1, sizeof(struct nvme_rdma_ctrlr));
if (rctrlr == NULL) {
@ -1570,6 +1626,33 @@ struct spdk_nvme_ctrlr *nvme_rdma_ctrlr_construct(const struct spdk_nvme_transpo
return NULL;
}
STAILQ_INIT(&rctrlr->pending_cm_events);
STAILQ_INIT(&rctrlr->free_cm_events);
rctrlr->cm_events = calloc(NVME_RDMA_NUM_CM_EVENTS, sizeof(*rctrlr->cm_events));
if (rctrlr->cm_events == NULL) {
SPDK_ERRLOG("unable to allocat buffers to hold CM events.\n");
nvme_rdma_ctrlr_destruct(&rctrlr->ctrlr);
return NULL;
}
for (i = 0; i < NVME_RDMA_NUM_CM_EVENTS; i++) {
STAILQ_INSERT_TAIL(&rctrlr->free_cm_events, &rctrlr->cm_events[i], link);
}
rctrlr->cm_channel = rdma_create_event_channel();
if (rctrlr->cm_channel == NULL) {
SPDK_ERRLOG("rdma_create_event_channel() failed\n");
nvme_rdma_ctrlr_destruct(&rctrlr->ctrlr);
return NULL;
}
flag = fcntl(rctrlr->cm_channel->fd, F_GETFL);
if (fcntl(rctrlr->cm_channel->fd, F_SETFL, flag | O_NONBLOCK) < 0) {
SPDK_ERRLOG("Cannot set event channel to non blocking\n");
nvme_rdma_ctrlr_destruct(&rctrlr->ctrlr);
return NULL;
}
rctrlr->ctrlr.adminq = nvme_rdma_ctrlr_create_qpair(&rctrlr->ctrlr, 0,
SPDK_NVMF_MIN_ADMIN_QUEUE_ENTRIES, 0, SPDK_NVMF_MIN_ADMIN_QUEUE_ENTRIES);
if (!rctrlr->ctrlr.adminq) {
@ -1606,11 +1689,25 @@ int
nvme_rdma_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
{
struct nvme_rdma_ctrlr *rctrlr = nvme_rdma_ctrlr(ctrlr);
struct nvme_rdma_cm_event_entry *entry;
if (ctrlr->adminq) {
nvme_rdma_qpair_destroy(ctrlr->adminq);
}
STAILQ_FOREACH(entry, &rctrlr->pending_cm_events, link) {
rdma_ack_cm_event(entry->evt);
}
STAILQ_INIT(&rctrlr->free_cm_events);
STAILQ_INIT(&rctrlr->pending_cm_events);
free(rctrlr->cm_events);
if (rctrlr->cm_channel) {
rdma_destroy_event_channel(rctrlr->cm_channel);
rctrlr->cm_channel = NULL;
}
nvme_ctrlr_destruct_finish(ctrlr);
free(rctrlr);
@ -1780,6 +1877,7 @@ nvme_rdma_qpair_process_completions(struct spdk_nvme_qpair *qpair,
uint32_t reaped;
struct ibv_cq *cq;
struct spdk_nvme_rdma_req *rdma_req;
struct nvme_rdma_ctrlr *rctrlr;
if (max_completions == 0) {
max_completions = rqpair->num_entries;
@ -1787,6 +1885,12 @@ nvme_rdma_qpair_process_completions(struct spdk_nvme_qpair *qpair,
max_completions = spdk_min(max_completions, rqpair->num_entries);
}
if (nvme_qpair_is_admin_queue(&rqpair->qpair)) {
rctrlr = nvme_rdma_ctrlr(rqpair->qpair.ctrlr);
nvme_rdma_poll_events(rctrlr);
}
nvme_rdma_qpair_process_cm_event(rqpair);
cq = rqpair->cq;
reaped = 0;