rdma: remove compile time config for SRQ

Change-Id: I44af3ee4dc6ec76045e1d0614910402487098a3d
Signed-off-by: Seth Howell <seth.howell@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/447120
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
Seth Howell 2019-03-05 16:46:20 -07:00 committed by Jim Harris
parent cf467578f8
commit 01201d3e87
2 changed files with 98 additions and 107 deletions

View File

@ -38,6 +38,8 @@ block except for metadata.
### NVMe-oF Target
Support for per-device shared receive queues in the RDMA transport has been added.
It is enabled by default for any device that supports it.
The size of a shared receive queue is defined by transport configuration file parameter
`MaxSRQDepth` and `nvmf_create_transport` RPC method parameter `max_srq_depth`.
Default size is 4096.

View File

@ -581,8 +581,6 @@ spdk_nvmf_rdma_set_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair,
return 0;
}
#ifndef SPDK_CONFIG_RDMA_SRQ
static void
nvmf_rdma_dump_request(struct spdk_nvmf_rdma_request *req)
{
@ -608,8 +606,6 @@ nvmf_rdma_dump_qpair_contents(struct spdk_nvmf_rdma_qpair *rqpair)
}
}
#endif
static void
nvmf_rdma_resources_destroy(struct spdk_nvmf_rdma_resources *resources)
{
@ -793,40 +789,37 @@ cleanup:
static void
spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
{
#ifdef SPDK_CONFIG_RDMA_SRQ
struct spdk_nvmf_rdma_recv *rdma_recv, *recv_tmp;
struct ibv_recv_wr *bad_recv_wr = NULL;
int rc;
#endif
spdk_trace_record(TRACE_RDMA_QP_DESTROY, 0, 0, (uintptr_t)rqpair->cm_id, 0);
spdk_poller_unregister(&rqpair->destruct_poller);
if (rqpair->qd != 0) {
#ifndef SPDK_CONFIG_RDMA_SRQ
nvmf_rdma_dump_qpair_contents(rqpair);
#endif
if (rqpair->poller->srq == NULL) {
nvmf_rdma_dump_qpair_contents(rqpair);
}
SPDK_WARNLOG("Destroying qpair when queue depth is %d\n", rqpair->qd);
}
if (rqpair->poller) {
TAILQ_REMOVE(&rqpair->poller->qpairs, rqpair, link);
}
#ifdef SPDK_CONFIG_RDMA_SRQ
/* Drop all received but unprocessed commands for this queue and return them to SRQ */
STAILQ_FOREACH_SAFE(rdma_recv, &rqpair->resources->incoming_queue, link, recv_tmp) {
if (rqpair == rdma_recv->qpair) {
STAILQ_REMOVE_HEAD(&rqpair->resources->incoming_queue, link);
rc = ibv_post_srq_recv(rqpair->poller->srq, &rdma_recv->wr, &bad_recv_wr);
if (rc) {
SPDK_ERRLOG("Unable to re-post rx descriptor\n");
if (rqpair->poller->srq != NULL) {
/* Drop all received but unprocessed commands for this queue and return them to SRQ */
STAILQ_FOREACH_SAFE(rdma_recv, &rqpair->resources->incoming_queue, link, recv_tmp) {
if (rqpair == rdma_recv->qpair) {
STAILQ_REMOVE_HEAD(&rqpair->resources->incoming_queue, link);
rc = ibv_post_srq_recv(rqpair->poller->srq, &rdma_recv->wr, &bad_recv_wr);
if (rc) {
SPDK_ERRLOG("Unable to re-post rx descriptor\n");
}
}
}
}
}
#endif
if (rqpair->cm_id) {
rdma_destroy_qp(rqpair->cm_id);
@ -837,9 +830,9 @@ spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
}
}
#ifndef SPDK_CONFIG_RDMA_SRQ
nvmf_rdma_resources_destroy(rqpair->resources);
#endif
if (rqpair->poller != NULL && rqpair->poller->srq == NULL) {
nvmf_rdma_resources_destroy(rqpair->resources);
}
free(rqpair);
}
@ -850,11 +843,9 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
struct spdk_nvmf_rdma_qpair *rqpair;
struct spdk_nvmf_rdma_poller *rpoller;
int rc, num_cqe, required_num_wr;
#ifndef SPDK_CONFIG_RDMA_SRQ
struct spdk_nvmf_rdma_transport *rtransport;
struct spdk_nvmf_transport *transport;
struct spdk_nvmf_rdma_resource_opts opts;
#endif
struct spdk_nvmf_rdma_device *device;
struct ibv_qp_init_attr ibv_init_attr;
@ -866,15 +857,16 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
ibv_init_attr.qp_type = IBV_QPT_RC;
ibv_init_attr.send_cq = rqpair->poller->cq;
ibv_init_attr.recv_cq = rqpair->poller->cq;
#ifdef SPDK_CONFIG_RDMA_SRQ
ibv_init_attr.srq = rqpair->poller->srq;
#endif
if (rqpair->poller->srq) {
ibv_init_attr.srq = rqpair->poller->srq;
} else {
ibv_init_attr.cap.max_recv_wr = rqpair->max_queue_depth +
1; /* RECV operations + dummy drain WR */
}
ibv_init_attr.cap.max_send_wr = rqpair->max_queue_depth *
2 + 1; /* SEND, READ, and WRITE operations + dummy drain WR */
#ifndef SPDK_CONFIG_RDMA_SRQ
ibv_init_attr.cap.max_recv_wr = rqpair->max_queue_depth +
1; /* RECV operations + dummy drain WR */
#endif
ibv_init_attr.cap.max_send_sge = spdk_min(device->attr.max_sge, NVMF_DEFAULT_TX_SGE);
ibv_init_attr.cap.max_recv_sge = spdk_min(device->attr.max_sge, NVMF_DEFAULT_RX_SGE);
@ -910,6 +902,8 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
rpoller->num_cqe = num_cqe;
}
rpoller->required_num_wr = required_num_wr;
rc = rdma_create_qp(rqpair->cm_id, rqpair->port->device->pd, &ibv_init_attr);
if (rc) {
SPDK_ERRLOG("rdma_create_qp failed: errno %d: %s\n", errno, spdk_strerror(errno));
@ -919,8 +913,6 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
return -1;
}
rpoller->required_num_wr = required_num_wr;
rqpair->max_send_depth = spdk_min((uint32_t)(rqpair->max_queue_depth * 2 + 1),
ibv_init_attr.cap.max_send_wr);
rqpair->max_send_sge = spdk_min(NVMF_DEFAULT_TX_SGE, ibv_init_attr.cap.max_send_sge);
@ -928,29 +920,29 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
spdk_trace_record(TRACE_RDMA_QP_CREATE, 0, 0, (uintptr_t)rqpair->cm_id, 0);
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "New RDMA Connection: %p\n", qpair);
#ifndef SPDK_CONFIG_RDMA_SRQ
rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
transport = &rtransport->transport;
if (rqpair->poller->srq == NULL) {
rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
transport = &rtransport->transport;
opts.qp = rqpair->cm_id->qp;
opts.pd = rqpair->cm_id->pd;
opts.qpair = rqpair;
opts.shared = false;
opts.max_queue_depth = rqpair->max_queue_depth;
opts.in_capsule_data_size = transport->opts.in_capsule_data_size;
opts.qp = rqpair->cm_id->qp;
opts.pd = rqpair->cm_id->pd;
opts.qpair = rqpair;
opts.shared = false;
opts.max_queue_depth = rqpair->max_queue_depth;
opts.in_capsule_data_size = transport->opts.in_capsule_data_size;
rqpair->resources = nvmf_rdma_resources_create(&opts);
rqpair->resources = nvmf_rdma_resources_create(&opts);
if (!rqpair->resources) {
SPDK_ERRLOG("Unable to allocate resources for receive queue.\n");
rdma_destroy_id(rqpair->cm_id);
rqpair->cm_id = NULL;
spdk_nvmf_rdma_qpair_destroy(rqpair);
return -1;
if (!rqpair->resources) {
SPDK_ERRLOG("Unable to allocate resources for receive queue.\n");
rdma_destroy_id(rqpair->cm_id);
rqpair->cm_id = NULL;
spdk_nvmf_rdma_qpair_destroy(rqpair);
return -1;
}
} else {
rqpair->resources = rqpair->poller->resources;
}
#else
rqpair->resources = rqpair->poller->resources;
#endif
rqpair->current_recv_depth = 0;
STAILQ_INIT(&rqpair->pending_rdma_read_queue);
@ -1016,11 +1008,13 @@ request_transfer_out(struct spdk_nvmf_request *req, int *data_posted)
assert(rdma_req->recv != NULL);
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA RECV POSTED. Recv: %p Connection: %p\n", rdma_req->recv,
rqpair);
#ifndef SPDK_CONFIG_RDMA_SRQ
rc = ibv_post_recv(rqpair->cm_id->qp, &rdma_req->recv->wr, &bad_recv_wr);
#else
rc = ibv_post_srq_recv(rqpair->poller->srq, &rdma_req->recv->wr, &bad_recv_wr);
#endif
if (rqpair->poller->srq == NULL) {
rc = ibv_post_recv(rqpair->cm_id->qp, &rdma_req->recv->wr, &bad_recv_wr);
} else {
rdma_req->recv->qpair = NULL;
rc = ibv_post_srq_recv(rqpair->poller->srq, &rdma_req->recv->wr, &bad_recv_wr);
}
if (rc) {
SPDK_ERRLOG("Unable to re-post rx descriptor\n");
return rc;
@ -2267,12 +2261,14 @@ spdk_nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport
STAILQ_REMOVE_HEAD(&resources->free_queue, state_link);
rdma_req->recv = STAILQ_FIRST(&resources->incoming_queue);
STAILQ_REMOVE_HEAD(&resources->incoming_queue, link);
#ifdef SPDK_CONFIG_RDMA_SRQ
rdma_req->req.qpair = &rdma_req->recv->qpair->qpair;
rdma_req->recv->qpair->qd++;
#else
rqpair->qd++;
#endif
if (rqpair->poller->srq != NULL) {
rdma_req->req.qpair = &rdma_req->recv->qpair->qpair;
rdma_req->recv->qpair->qd++;
} else {
rqpair->qd++;
}
rdma_req->state = RDMA_REQUEST_STATE_NEW;
if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
break;
@ -2593,12 +2589,9 @@ spdk_nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport)
struct spdk_nvmf_rdma_poll_group *rgroup;
struct spdk_nvmf_rdma_poller *poller;
struct spdk_nvmf_rdma_device *device;
#ifdef SPDK_CONFIG_RDMA_SRQ
struct ibv_srq_init_attr srq_init_attr;
struct spdk_nvmf_rdma_resource_opts opts;
#endif
rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
rgroup = calloc(1, sizeof(*rgroup));
@ -2634,35 +2627,34 @@ spdk_nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport)
poller->num_cqe = DEFAULT_NVMF_RDMA_CQ_SIZE;
TAILQ_INSERT_TAIL(&rgroup->pollers, poller, link);
if (device->attr.max_srq != 0) {
poller->max_srq_depth = transport->opts.max_srq_depth;
#ifdef SPDK_CONFIG_RDMA_SRQ
poller->max_srq_depth = transport->opts.max_srq_depth;
memset(&srq_init_attr, 0, sizeof(struct ibv_srq_init_attr));
srq_init_attr.attr.max_wr = poller->max_srq_depth;
srq_init_attr.attr.max_sge = spdk_min(device->attr.max_sge, NVMF_DEFAULT_RX_SGE);
poller->srq = ibv_create_srq(device->pd, &srq_init_attr);
if (!poller->srq) {
SPDK_ERRLOG("Unable to create shared receive queue, errno %d\n", errno);
spdk_nvmf_rdma_poll_group_destroy(&rgroup->group);
pthread_mutex_unlock(&rtransport->lock);
return NULL;
}
memset(&srq_init_attr, 0, sizeof(struct ibv_srq_init_attr));
srq_init_attr.attr.max_wr = poller->max_srq_depth;
srq_init_attr.attr.max_sge = spdk_min(device->attr.max_sge, NVMF_DEFAULT_RX_SGE);
poller->srq = ibv_create_srq(device->pd, &srq_init_attr);
if (!poller->srq) {
SPDK_ERRLOG("Unable to create shared receive queue, errno %d\n", errno);
spdk_nvmf_rdma_poll_group_destroy(&rgroup->group);
pthread_mutex_unlock(&rtransport->lock);
return NULL;
opts.qp = poller->srq;
opts.pd = device->pd;
opts.qpair = NULL;
opts.shared = true;
opts.max_queue_depth = poller->max_srq_depth;
opts.in_capsule_data_size = transport->opts.in_capsule_data_size;
poller->resources = nvmf_rdma_resources_create(&opts);
if (!poller->resources) {
SPDK_ERRLOG("Unable to allocate resources for shared receive queue.\n");
spdk_nvmf_rdma_poll_group_destroy(&rgroup->group);
pthread_mutex_unlock(&rtransport->lock);
}
}
opts.qp = poller->srq;
opts.pd = device->pd;
opts.qpair = NULL;
opts.shared = true;
opts.max_queue_depth = poller->max_srq_depth;
opts.in_capsule_data_size = transport->opts.in_capsule_data_size;
poller->resources = nvmf_rdma_resources_create(&opts);
if (!poller->resources) {
SPDK_ERRLOG("Unable to allocate resources for shared receive queue.\n");
spdk_nvmf_rdma_poll_group_destroy(&rgroup->group);
pthread_mutex_unlock(&rtransport->lock);
}
#endif
}
pthread_mutex_unlock(&rtransport->lock);
@ -2685,15 +2677,12 @@ spdk_nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
TAILQ_FOREACH_SAFE(poller, &rgroup->pollers, link, tmp) {
TAILQ_REMOVE(&rgroup->pollers, poller, link);
#ifdef SPDK_CONFIG_RDMA_SRQ
if (poller->srq) {
nvmf_rdma_resources_destroy(poller->resources);
ibv_destroy_srq(poller->srq);
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Destroyed RDMA shared queue %p\n", poller->srq);
}
nvmf_rdma_resources_destroy(poller->resources);
#endif
if (poller->cq) {
ibv_destroy_cq(poller->cq);
}
@ -2841,7 +2830,6 @@ spdk_nvmf_rdma_close_qpair(struct spdk_nvmf_qpair *qpair)
NVMF_RDMA_QPAIR_DESTROY_TIMEOUT_US);
}
#ifdef SPDK_CONFIG_RDMA_SRQ
static struct spdk_nvmf_rdma_qpair *
get_rdma_qpair_from_wc(struct spdk_nvmf_rdma_poller *rpoller, struct ibv_wc *wc)
{
@ -2855,7 +2843,6 @@ get_rdma_qpair_from_wc(struct spdk_nvmf_rdma_poller *rpoller, struct ibv_wc *wc)
SPDK_ERRLOG("Didn't find QP with qp_num %u\n", wc->qp_num);
return NULL;
}
#endif
#ifdef DEBUG
static int
@ -2913,13 +2900,15 @@ spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
spdk_nvmf_rdma_request_process(rtransport, rdma_req);
break;
case RDMA_WR_TYPE_RECV:
/* rdma_recv->qpair will be NULL if using an SRQ. In that case we have to get the qpair from the wc. */
rdma_recv = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_recv, rdma_wr);
#ifdef SPDK_CONFIG_RDMA_SRQ
rdma_recv->qpair = get_rdma_qpair_from_wc(rpoller, &wc[i]);
assert(rdma_recv->qpair != NULL);
#endif
if (rdma_recv->qpair == NULL) {
rdma_recv->qpair = get_rdma_qpair_from_wc(rpoller, &wc[i]);
}
rqpair = rdma_recv->qpair;
assert(rqpair != NULL);
/* Dump this into the incoming queue. This gets cleaned up when
* the queue pair disconnects or recovers. */
STAILQ_INSERT_TAIL(&rqpair->resources->incoming_queue, rdma_recv, link);
@ -3010,11 +2999,11 @@ spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
case IBV_WC_RECV:
assert(rdma_wr->type == RDMA_WR_TYPE_RECV);
/* rdma_recv->qpair will be NULL if using an SRQ. In that case we have to get the qpair from the wc. */
rdma_recv = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_recv, rdma_wr);
#ifdef SPDK_CONFIG_RDMA_SRQ
rdma_recv->qpair = get_rdma_qpair_from_wc(rpoller, &wc[i]);
assert(rdma_recv->qpair != NULL);
#endif
if (rdma_recv->qpair == NULL) {
rdma_recv->qpair = get_rdma_qpair_from_wc(rpoller, &wc[i]);
}
rqpair = rdma_recv->qpair;
/* The qpair should not send more requests than are allowed per qpair. */
if (rqpair->current_recv_depth >= rqpair->max_queue_depth) {