nvmf/rdma: Create one cq per thread instead of per connection
This greatly increases the efficiency when the target is scaled to many connections. Now all connections being handled by a given thread can be polled in O(1), whereas before it was O(n) where n was the number of connections. Change-Id: I9f695f68093d73e6538df416b0f1aabef07119ff Signed-off-by: Ben Walker <benjamin.walker@intel.com> Reviewed-on: https://review.gerrithub.io/391491 Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com> Tested-by: SPDK Automated Test System <sys_sgsw@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
7b397f6281
commit
2a0772e3b8
@ -56,6 +56,9 @@
|
|||||||
#define NVMF_DEFAULT_TX_SGE 1
|
#define NVMF_DEFAULT_TX_SGE 1
|
||||||
#define NVMF_DEFAULT_RX_SGE 2
|
#define NVMF_DEFAULT_RX_SGE 2
|
||||||
|
|
||||||
|
/* The RDMA completion queue size */
|
||||||
|
#define NVMF_RDMA_CQ_SIZE 4096
|
||||||
|
|
||||||
/* AIO backend requires block size aligned data buffers,
|
/* AIO backend requires block size aligned data buffers,
|
||||||
* extra 4KiB aligned data buffer should work for most devices.
|
* extra 4KiB aligned data buffer should work for most devices.
|
||||||
*/
|
*/
|
||||||
@ -116,6 +119,8 @@ struct spdk_nvmf_rdma_recv {
|
|||||||
struct ibv_recv_wr wr;
|
struct ibv_recv_wr wr;
|
||||||
struct ibv_sge sgl[NVMF_DEFAULT_RX_SGE];
|
struct ibv_sge sgl[NVMF_DEFAULT_RX_SGE];
|
||||||
|
|
||||||
|
struct spdk_nvmf_rdma_qpair *qpair;
|
||||||
|
|
||||||
/* In-capsule data buffer */
|
/* In-capsule data buffer */
|
||||||
uint8_t *buf;
|
uint8_t *buf;
|
||||||
|
|
||||||
@ -150,7 +155,6 @@ struct spdk_nvmf_rdma_qpair {
|
|||||||
struct spdk_nvmf_rdma_poller *poller;
|
struct spdk_nvmf_rdma_poller *poller;
|
||||||
|
|
||||||
struct rdma_cm_id *cm_id;
|
struct rdma_cm_id *cm_id;
|
||||||
struct ibv_cq *cq;
|
|
||||||
|
|
||||||
/* The maximum number of I/O outstanding on this connection at one time */
|
/* The maximum number of I/O outstanding on this connection at one time */
|
||||||
uint16_t max_queue_depth;
|
uint16_t max_queue_depth;
|
||||||
@ -213,6 +217,8 @@ struct spdk_nvmf_rdma_poller {
|
|||||||
struct spdk_nvmf_rdma_device *device;
|
struct spdk_nvmf_rdma_device *device;
|
||||||
struct spdk_nvmf_rdma_poll_group *group;
|
struct spdk_nvmf_rdma_poll_group *group;
|
||||||
|
|
||||||
|
struct ibv_cq *cq;
|
||||||
|
|
||||||
TAILQ_HEAD(, spdk_nvmf_rdma_qpair) qpairs;
|
TAILQ_HEAD(, spdk_nvmf_rdma_qpair) qpairs;
|
||||||
|
|
||||||
TAILQ_ENTRY(spdk_nvmf_rdma_poller) link;
|
TAILQ_ENTRY(spdk_nvmf_rdma_poller) link;
|
||||||
@ -308,10 +314,6 @@ spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
|
|||||||
rdma_destroy_id(rqpair->cm_id);
|
rdma_destroy_id(rqpair->cm_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rqpair->cq) {
|
|
||||||
ibv_destroy_cq(rqpair->cq);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rqpair->mgmt_channel) {
|
if (rqpair->mgmt_channel) {
|
||||||
spdk_put_io_channel(rqpair->mgmt_channel);
|
spdk_put_io_channel(rqpair->mgmt_channel);
|
||||||
}
|
}
|
||||||
@ -339,20 +341,10 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
|
|||||||
rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
|
rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
|
||||||
rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
|
rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
|
||||||
|
|
||||||
rqpair->cq = ibv_create_cq(rqpair->cm_id->verbs, rqpair->max_queue_depth * 3, rqpair, NULL, 0);
|
|
||||||
if (!rqpair->cq) {
|
|
||||||
spdk_strerror_r(errno, buf, sizeof(buf));
|
|
||||||
SPDK_ERRLOG("Unable to create completion queue\n");
|
|
||||||
SPDK_ERRLOG("Errno %d: %s\n", errno, buf);
|
|
||||||
rdma_destroy_id(rqpair->cm_id);
|
|
||||||
spdk_nvmf_rdma_qpair_destroy(rqpair);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
|
memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
|
||||||
attr.qp_type = IBV_QPT_RC;
|
attr.qp_type = IBV_QPT_RC;
|
||||||
attr.send_cq = rqpair->cq;
|
attr.send_cq = rqpair->poller->cq;
|
||||||
attr.recv_cq = rqpair->cq;
|
attr.recv_cq = rqpair->poller->cq;
|
||||||
attr.cap.max_send_wr = rqpair->max_queue_depth * 2; /* SEND, READ, and WRITE operations */
|
attr.cap.max_send_wr = rqpair->max_queue_depth * 2; /* SEND, READ, and WRITE operations */
|
||||||
attr.cap.max_recv_wr = rqpair->max_queue_depth; /* RECV operations */
|
attr.cap.max_recv_wr = rqpair->max_queue_depth; /* RECV operations */
|
||||||
attr.cap.max_send_sge = NVMF_DEFAULT_TX_SGE;
|
attr.cap.max_send_sge = NVMF_DEFAULT_TX_SGE;
|
||||||
@ -411,6 +403,7 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
|
|||||||
struct ibv_recv_wr *bad_wr = NULL;
|
struct ibv_recv_wr *bad_wr = NULL;
|
||||||
|
|
||||||
rdma_recv = &rqpair->recvs[i];
|
rdma_recv = &rqpair->recvs[i];
|
||||||
|
rdma_recv->qpair = rqpair;
|
||||||
|
|
||||||
/* Set up memory to receive commands */
|
/* Set up memory to receive commands */
|
||||||
rdma_recv->buf = (void *)((uintptr_t)rqpair->bufs + (i * rtransport->in_capsule_data_size));
|
rdma_recv->buf = (void *)((uintptr_t)rqpair->bufs + (i * rtransport->in_capsule_data_size));
|
||||||
@ -1393,10 +1386,6 @@ spdk_nvmf_rdma_stop_listen(struct spdk_nvmf_transport *transport,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
spdk_nvmf_rdma_qpair_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
|
||||||
struct spdk_nvmf_rdma_qpair *rqpair);
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
spdk_nvmf_rdma_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn)
|
spdk_nvmf_rdma_accept(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn)
|
||||||
{
|
{
|
||||||
@ -1511,6 +1500,15 @@ spdk_nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport)
|
|||||||
|
|
||||||
TAILQ_INIT(&poller->qpairs);
|
TAILQ_INIT(&poller->qpairs);
|
||||||
|
|
||||||
|
poller->cq = ibv_create_cq(device->context, NVMF_RDMA_CQ_SIZE, poller, NULL, 0);
|
||||||
|
if (!poller->cq) {
|
||||||
|
SPDK_ERRLOG("Unable to create completion queue\n");
|
||||||
|
free(poller);
|
||||||
|
free(rgroup);
|
||||||
|
pthread_mutex_unlock(&rtransport->lock);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
TAILQ_INSERT_TAIL(&rgroup->pollers, poller, link);
|
TAILQ_INSERT_TAIL(&rgroup->pollers, poller, link);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1532,6 +1530,11 @@ spdk_nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
|
|||||||
|
|
||||||
TAILQ_FOREACH_SAFE(poller, &rgroup->pollers, link, tmp) {
|
TAILQ_FOREACH_SAFE(poller, &rgroup->pollers, link, tmp) {
|
||||||
TAILQ_REMOVE(&rgroup->pollers, poller, link);
|
TAILQ_REMOVE(&rgroup->pollers, poller, link);
|
||||||
|
|
||||||
|
if (poller->cq) {
|
||||||
|
ibv_destroy_cq(poller->cq);
|
||||||
|
}
|
||||||
|
|
||||||
free(poller);
|
free(poller);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1695,22 +1698,26 @@ spdk_nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct spdk_nvmf_rdma_request *
|
static struct spdk_nvmf_rdma_request *
|
||||||
get_rdma_req_from_wc(struct spdk_nvmf_rdma_qpair *rqpair,
|
get_rdma_req_from_wc(struct ibv_wc *wc)
|
||||||
struct ibv_wc *wc)
|
|
||||||
{
|
{
|
||||||
struct spdk_nvmf_rdma_request *rdma_req;
|
struct spdk_nvmf_rdma_request *rdma_req;
|
||||||
|
|
||||||
rdma_req = (struct spdk_nvmf_rdma_request *)wc->wr_id;
|
rdma_req = (struct spdk_nvmf_rdma_request *)wc->wr_id;
|
||||||
assert(rdma_req != NULL);
|
assert(rdma_req != NULL);
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
struct spdk_nvmf_rdma_qpair *rqpair;
|
||||||
|
rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
|
||||||
|
|
||||||
assert(rdma_req - rqpair->reqs >= 0);
|
assert(rdma_req - rqpair->reqs >= 0);
|
||||||
assert(rdma_req - rqpair->reqs < (ptrdiff_t)rqpair->max_queue_depth);
|
assert(rdma_req - rqpair->reqs < (ptrdiff_t)rqpair->max_queue_depth);
|
||||||
|
#endif
|
||||||
|
|
||||||
return rdma_req;
|
return rdma_req;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct spdk_nvmf_rdma_recv *
|
static struct spdk_nvmf_rdma_recv *
|
||||||
get_rdma_recv_from_wc(struct spdk_nvmf_rdma_qpair *rqpair,
|
get_rdma_recv_from_wc(struct ibv_wc *wc)
|
||||||
struct ibv_wc *wc)
|
|
||||||
{
|
{
|
||||||
struct spdk_nvmf_rdma_recv *rdma_recv;
|
struct spdk_nvmf_rdma_recv *rdma_recv;
|
||||||
|
|
||||||
@ -1718,26 +1725,32 @@ get_rdma_recv_from_wc(struct spdk_nvmf_rdma_qpair *rqpair,
|
|||||||
|
|
||||||
rdma_recv = (struct spdk_nvmf_rdma_recv *)wc->wr_id;
|
rdma_recv = (struct spdk_nvmf_rdma_recv *)wc->wr_id;
|
||||||
assert(rdma_recv != NULL);
|
assert(rdma_recv != NULL);
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
struct spdk_nvmf_rdma_qpair *rqpair = rdma_recv->qpair;
|
||||||
|
|
||||||
assert(rdma_recv - rqpair->recvs >= 0);
|
assert(rdma_recv - rqpair->recvs >= 0);
|
||||||
assert(rdma_recv - rqpair->recvs < (ptrdiff_t)rqpair->max_queue_depth);
|
assert(rdma_recv - rqpair->recvs < (ptrdiff_t)rqpair->max_queue_depth);
|
||||||
|
#endif
|
||||||
|
|
||||||
return rdma_recv;
|
return rdma_recv;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
spdk_nvmf_rdma_qpair_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
||||||
struct spdk_nvmf_rdma_qpair *rqpair)
|
struct spdk_nvmf_rdma_poller *rpoller)
|
||||||
{
|
{
|
||||||
struct ibv_wc wc[32];
|
struct ibv_wc wc[32];
|
||||||
struct spdk_nvmf_rdma_request *rdma_req;
|
struct spdk_nvmf_rdma_request *rdma_req;
|
||||||
struct spdk_nvmf_rdma_recv *rdma_recv;
|
struct spdk_nvmf_rdma_recv *rdma_recv;
|
||||||
|
struct spdk_nvmf_rdma_qpair *rqpair;
|
||||||
int reaped, i;
|
int reaped, i;
|
||||||
int count = 0;
|
int count = 0;
|
||||||
bool error = false;
|
bool error = false;
|
||||||
char buf[64];
|
char buf[64];
|
||||||
|
|
||||||
/* Poll for completing operations. */
|
/* Poll for completing operations. */
|
||||||
reaped = ibv_poll_cq(rqpair->cq, 32, wc);
|
reaped = ibv_poll_cq(rpoller->cq, 32, wc);
|
||||||
if (reaped < 0) {
|
if (reaped < 0) {
|
||||||
spdk_strerror_r(errno, buf, sizeof(buf));
|
spdk_strerror_r(errno, buf, sizeof(buf));
|
||||||
SPDK_ERRLOG("Error polling CQ! (%d): %s\n",
|
SPDK_ERRLOG("Error polling CQ! (%d): %s\n",
|
||||||
@ -1748,14 +1761,15 @@ spdk_nvmf_rdma_qpair_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
|||||||
for (i = 0; i < reaped; i++) {
|
for (i = 0; i < reaped; i++) {
|
||||||
if (wc[i].status) {
|
if (wc[i].status) {
|
||||||
SPDK_ERRLOG("CQ error on CQ %p, Request 0x%lu (%d): %s\n",
|
SPDK_ERRLOG("CQ error on CQ %p, Request 0x%lu (%d): %s\n",
|
||||||
rqpair->cq, wc[i].wr_id, wc[i].status, ibv_wc_status_str(wc[i].status));
|
rpoller->cq, wc[i].wr_id, wc[i].status, ibv_wc_status_str(wc[i].status));
|
||||||
error = true;
|
error = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (wc[i].opcode) {
|
switch (wc[i].opcode) {
|
||||||
case IBV_WC_SEND:
|
case IBV_WC_SEND:
|
||||||
rdma_req = get_rdma_req_from_wc(rqpair, &wc[i]);
|
rdma_req = get_rdma_req_from_wc(&wc[i]);
|
||||||
|
rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
|
||||||
|
|
||||||
assert(rdma_req->state == RDMA_REQUEST_STATE_COMPLETING);
|
assert(rdma_req->state == RDMA_REQUEST_STATE_COMPLETING);
|
||||||
rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
|
rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
|
||||||
@ -1769,6 +1783,9 @@ spdk_nvmf_rdma_qpair_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case IBV_WC_RDMA_WRITE:
|
case IBV_WC_RDMA_WRITE:
|
||||||
|
rdma_req = get_rdma_req_from_wc(&wc[i]);
|
||||||
|
rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
|
||||||
|
|
||||||
rqpair->cur_rdma_rw_depth--;
|
rqpair->cur_rdma_rw_depth--;
|
||||||
|
|
||||||
/* Try to process other queued requests */
|
/* Try to process other queued requests */
|
||||||
@ -1776,7 +1793,8 @@ spdk_nvmf_rdma_qpair_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case IBV_WC_RDMA_READ:
|
case IBV_WC_RDMA_READ:
|
||||||
rdma_req = get_rdma_req_from_wc(rqpair, &wc[i]);
|
rdma_req = get_rdma_req_from_wc(&wc[i]);
|
||||||
|
rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
|
||||||
|
|
||||||
assert(rdma_req->state == RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
|
assert(rdma_req->state == RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
|
||||||
rqpair->cur_rdma_rw_depth--;
|
rqpair->cur_rdma_rw_depth--;
|
||||||
@ -1789,7 +1807,8 @@ spdk_nvmf_rdma_qpair_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case IBV_WC_RECV:
|
case IBV_WC_RECV:
|
||||||
rdma_recv = get_rdma_recv_from_wc(rqpair, &wc[i]);
|
rdma_recv = get_rdma_recv_from_wc(&wc[i]);
|
||||||
|
rqpair = rdma_recv->qpair;
|
||||||
|
|
||||||
TAILQ_INSERT_TAIL(&rqpair->incoming_queue, rdma_recv, link);
|
TAILQ_INSERT_TAIL(&rqpair->incoming_queue, rdma_recv, link);
|
||||||
|
|
||||||
@ -1816,7 +1835,6 @@ spdk_nvmf_rdma_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
|
|||||||
struct spdk_nvmf_rdma_transport *rtransport;
|
struct spdk_nvmf_rdma_transport *rtransport;
|
||||||
struct spdk_nvmf_rdma_poll_group *rgroup;
|
struct spdk_nvmf_rdma_poll_group *rgroup;
|
||||||
struct spdk_nvmf_rdma_poller *rpoller;
|
struct spdk_nvmf_rdma_poller *rpoller;
|
||||||
struct spdk_nvmf_rdma_qpair *rqpair;
|
|
||||||
int count, rc;
|
int count, rc;
|
||||||
|
|
||||||
rtransport = SPDK_CONTAINEROF(group->transport, struct spdk_nvmf_rdma_transport, transport);
|
rtransport = SPDK_CONTAINEROF(group->transport, struct spdk_nvmf_rdma_transport, transport);
|
||||||
@ -1824,14 +1842,12 @@ spdk_nvmf_rdma_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
|
|||||||
|
|
||||||
count = 0;
|
count = 0;
|
||||||
TAILQ_FOREACH(rpoller, &rgroup->pollers, link) {
|
TAILQ_FOREACH(rpoller, &rgroup->pollers, link) {
|
||||||
TAILQ_FOREACH(rqpair, &rpoller->qpairs, link) {
|
rc = spdk_nvmf_rdma_poller_poll(rtransport, rpoller);
|
||||||
rc = spdk_nvmf_rdma_qpair_poll(rtransport, rqpair);
|
|
||||||
if (rc < 0) {
|
if (rc < 0) {
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
count += rc;
|
count += rc;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user