|
|
|
@ -296,11 +296,13 @@ struct spdk_nvmf_rdma_qpair {
|
|
|
|
|
/* The maximum number of SGEs per WR on the recv queue */
|
|
|
|
|
uint32_t max_recv_sge;
|
|
|
|
|
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
/* Receives that are waiting for a request object */
|
|
|
|
|
STAILQ_HEAD(, spdk_nvmf_rdma_recv) incoming_queue;
|
|
|
|
|
|
|
|
|
|
/* Queues to track requests in critical states */
|
|
|
|
|
STAILQ_HEAD(, spdk_nvmf_rdma_request) free_queue;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
STAILQ_HEAD(, spdk_nvmf_rdma_request) pending_rdma_read_queue;
|
|
|
|
|
|
|
|
|
@ -309,6 +311,7 @@ struct spdk_nvmf_rdma_qpair {
|
|
|
|
|
/* Number of requests not in the free state */
|
|
|
|
|
uint32_t qd;
|
|
|
|
|
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
/* Array of size "max_queue_depth" containing RDMA requests. */
|
|
|
|
|
struct spdk_nvmf_rdma_request *reqs;
|
|
|
|
|
|
|
|
|
@ -332,6 +335,7 @@ struct spdk_nvmf_rdma_qpair {
|
|
|
|
|
*/
|
|
|
|
|
void *bufs;
|
|
|
|
|
struct ibv_mr *bufs_mr;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
TAILQ_ENTRY(spdk_nvmf_rdma_qpair) link;
|
|
|
|
|
|
|
|
|
@ -363,6 +367,44 @@ struct spdk_nvmf_rdma_poller {
|
|
|
|
|
int required_num_wr;
|
|
|
|
|
struct ibv_cq *cq;
|
|
|
|
|
|
|
|
|
|
#ifdef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
/* The maximum number of I/O outstanding on the shared receive queue at one time */
|
|
|
|
|
uint16_t max_srq_depth;
|
|
|
|
|
|
|
|
|
|
/* Shared receive queue */
|
|
|
|
|
struct ibv_srq *srq;
|
|
|
|
|
|
|
|
|
|
/* Receives that are waiting for a request object */
|
|
|
|
|
STAILQ_HEAD(, spdk_nvmf_rdma_recv) incoming_queue;
|
|
|
|
|
|
|
|
|
|
/* Queue to track free requests */
|
|
|
|
|
STAILQ_HEAD(, spdk_nvmf_rdma_request) free_queue;
|
|
|
|
|
|
|
|
|
|
/* Array of size "max_srq_depth" containing RDMA requests. */
|
|
|
|
|
struct spdk_nvmf_rdma_request *reqs;
|
|
|
|
|
|
|
|
|
|
/* Array of size "max_srq_depth" containing RDMA recvs. */
|
|
|
|
|
struct spdk_nvmf_rdma_recv *recvs;
|
|
|
|
|
|
|
|
|
|
/* Array of size "max_srq_depth" containing 64 byte capsules
|
|
|
|
|
* used for receive.
|
|
|
|
|
*/
|
|
|
|
|
union nvmf_h2c_msg *cmds;
|
|
|
|
|
struct ibv_mr *cmds_mr;
|
|
|
|
|
|
|
|
|
|
/* Array of size "max_srq_depth" containing 16 byte completions
|
|
|
|
|
* to be sent back to the user.
|
|
|
|
|
*/
|
|
|
|
|
union nvmf_c2h_msg *cpls;
|
|
|
|
|
struct ibv_mr *cpls_mr;
|
|
|
|
|
|
|
|
|
|
/* Array of size "max_srq_depth * InCapsuleDataSize" containing
|
|
|
|
|
* buffers to be used for in capsule data.
|
|
|
|
|
*/
|
|
|
|
|
void *bufs;
|
|
|
|
|
struct ibv_mr *bufs_mr;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
TAILQ_HEAD(, spdk_nvmf_rdma_qpair) qpairs;
|
|
|
|
|
|
|
|
|
|
TAILQ_ENTRY(spdk_nvmf_rdma_poller) link;
|
|
|
|
@ -559,6 +601,8 @@ spdk_nvmf_rdma_set_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair,
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
nvmf_rdma_dump_request(struct spdk_nvmf_rdma_request *req)
|
|
|
|
|
{
|
|
|
|
@ -584,15 +628,25 @@ nvmf_rdma_dump_qpair_contents(struct spdk_nvmf_rdma_qpair *rqpair)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
|
|
|
|
|
{
|
|
|
|
|
#ifdef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
struct spdk_nvmf_rdma_recv *rdma_recv, *recv_tmp;
|
|
|
|
|
struct ibv_recv_wr *bad_recv_wr = NULL;
|
|
|
|
|
int rc;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
spdk_trace_record(TRACE_RDMA_QP_DESTROY, 0, 0, (uintptr_t)rqpair->cm_id, 0);
|
|
|
|
|
|
|
|
|
|
spdk_poller_unregister(&rqpair->destruct_poller);
|
|
|
|
|
|
|
|
|
|
if (rqpair->qd != 0) {
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
nvmf_rdma_dump_qpair_contents(rqpair);
|
|
|
|
|
#endif
|
|
|
|
|
SPDK_WARNLOG("Destroying qpair when queue depth is %d\n", rqpair->qd);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -600,6 +654,7 @@ spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
|
|
|
|
|
TAILQ_REMOVE(&rqpair->poller->qpairs, rqpair, link);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
if (rqpair->cmds_mr) {
|
|
|
|
|
ibv_dereg_mr(rqpair->cmds_mr);
|
|
|
|
|
}
|
|
|
|
@ -611,6 +666,18 @@ spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
|
|
|
|
|
if (rqpair->bufs_mr) {
|
|
|
|
|
ibv_dereg_mr(rqpair->bufs_mr);
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
/* Drop all received but unprocessed commands for this queue and return them to SRQ */
|
|
|
|
|
STAILQ_FOREACH_SAFE(rdma_recv, &rqpair->poller->incoming_queue, link, recv_tmp) {
|
|
|
|
|
if (rqpair == rdma_recv->qpair) {
|
|
|
|
|
STAILQ_REMOVE_HEAD(&rqpair->poller->incoming_queue, link);
|
|
|
|
|
rc = ibv_post_srq_recv(rqpair->poller->srq, &rdma_recv->wr, &bad_recv_wr);
|
|
|
|
|
if (rc) {
|
|
|
|
|
SPDK_ERRLOG("Unable to re-post rx descriptor\n");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
if (rqpair->cm_id) {
|
|
|
|
|
rdma_destroy_qp(rqpair->cm_id);
|
|
|
|
@ -622,30 +689,33 @@ spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Free all memory */
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
spdk_dma_free(rqpair->cmds);
|
|
|
|
|
spdk_dma_free(rqpair->cpls);
|
|
|
|
|
spdk_dma_free(rqpair->bufs);
|
|
|
|
|
free(rqpair->reqs);
|
|
|
|
|
free(rqpair->recvs);
|
|
|
|
|
#endif
|
|
|
|
|
free(rqpair);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
|
|
|
|
|
{
|
|
|
|
|
struct spdk_nvmf_rdma_transport *rtransport;
|
|
|
|
|
struct spdk_nvmf_rdma_qpair *rqpair;
|
|
|
|
|
struct spdk_nvmf_rdma_poller *rpoller;
|
|
|
|
|
int rc, i, num_cqe, required_num_wr;;
|
|
|
|
|
int rc, num_cqe, required_num_wr;
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
int i;
|
|
|
|
|
struct spdk_nvmf_rdma_recv *rdma_recv;
|
|
|
|
|
struct spdk_nvmf_rdma_request *rdma_req;
|
|
|
|
|
struct spdk_nvmf_rdma_transport *rtransport;
|
|
|
|
|
struct spdk_nvmf_transport *transport;
|
|
|
|
|
#endif
|
|
|
|
|
struct spdk_nvmf_rdma_device *device;
|
|
|
|
|
struct ibv_qp_init_attr ibv_init_attr;
|
|
|
|
|
|
|
|
|
|
rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
|
|
|
|
|
rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
|
|
|
|
|
transport = &rtransport->transport;
|
|
|
|
|
device = rqpair->port->device;
|
|
|
|
|
|
|
|
|
|
memset(&ibv_init_attr, 0, sizeof(struct ibv_qp_init_attr));
|
|
|
|
@ -653,10 +723,15 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
|
|
|
|
|
ibv_init_attr.qp_type = IBV_QPT_RC;
|
|
|
|
|
ibv_init_attr.send_cq = rqpair->poller->cq;
|
|
|
|
|
ibv_init_attr.recv_cq = rqpair->poller->cq;
|
|
|
|
|
#ifdef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
ibv_init_attr.srq = rqpair->poller->srq;
|
|
|
|
|
#endif
|
|
|
|
|
ibv_init_attr.cap.max_send_wr = rqpair->max_queue_depth *
|
|
|
|
|
2 + 1; /* SEND, READ, and WRITE operations + dummy drain WR */
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
ibv_init_attr.cap.max_recv_wr = rqpair->max_queue_depth +
|
|
|
|
|
1; /* RECV operations + dummy drain WR */
|
|
|
|
|
#endif
|
|
|
|
|
ibv_init_attr.cap.max_send_sge = spdk_min(device->attr.max_sge, NVMF_DEFAULT_TX_SGE);
|
|
|
|
|
ibv_init_attr.cap.max_recv_sge = spdk_min(device->attr.max_sge, NVMF_DEFAULT_RX_SGE);
|
|
|
|
|
|
|
|
|
@ -710,6 +785,10 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
|
|
|
|
|
spdk_trace_record(TRACE_RDMA_QP_CREATE, 0, 0, (uintptr_t)rqpair->cm_id, 0);
|
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "New RDMA Connection: %p\n", qpair);
|
|
|
|
|
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
|
|
|
|
|
transport = &rtransport->transport;
|
|
|
|
|
|
|
|
|
|
rqpair->reqs = calloc(rqpair->max_queue_depth, sizeof(*rqpair->reqs));
|
|
|
|
|
rqpair->recvs = calloc(rqpair->max_queue_depth, sizeof(*rqpair->recvs));
|
|
|
|
|
rqpair->cmds = spdk_dma_zmalloc(rqpair->max_queue_depth * sizeof(*rqpair->cmds),
|
|
|
|
@ -760,12 +839,15 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
|
|
|
|
|
rqpair->bufs, rqpair->max_queue_depth *
|
|
|
|
|
transport->opts.in_capsule_data_size, rqpair->bufs_mr->lkey);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
STAILQ_INIT(&rqpair->free_queue);
|
|
|
|
|
rqpair->current_recv_depth = rqpair->max_queue_depth;
|
|
|
|
|
STAILQ_INIT(&rqpair->pending_rdma_read_queue);
|
|
|
|
|
STAILQ_INIT(&rqpair->pending_rdma_write_queue);
|
|
|
|
|
|
|
|
|
|
rqpair->current_recv_depth = rqpair->max_queue_depth;
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
STAILQ_INIT(&rqpair->free_queue);
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < rqpair->max_queue_depth; i++) {
|
|
|
|
|
struct ibv_recv_wr *bad_wr = NULL;
|
|
|
|
|
|
|
|
|
@ -839,6 +921,7 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
|
|
|
|
|
rdma_req->state = RDMA_REQUEST_STATE_FREE;
|
|
|
|
|
STAILQ_INSERT_HEAD(&rqpair->free_queue, rdma_req, state_link);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
@ -900,7 +983,11 @@ request_transfer_out(struct spdk_nvmf_request *req, int *data_posted)
|
|
|
|
|
assert(rdma_req->recv != NULL);
|
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA RECV POSTED. Recv: %p Connection: %p\n", rdma_req->recv,
|
|
|
|
|
rqpair);
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
rc = ibv_post_recv(rqpair->cm_id->qp, &rdma_req->recv->wr, &bad_recv_wr);
|
|
|
|
|
#else
|
|
|
|
|
rc = ibv_post_srq_recv(rqpair->poller->srq, &rdma_req->recv->wr, &bad_recv_wr);
|
|
|
|
|
#endif
|
|
|
|
|
if (rc) {
|
|
|
|
|
SPDK_ERRLOG("Unable to re-post rx descriptor\n");
|
|
|
|
|
return rc;
|
|
|
|
@ -1066,7 +1153,9 @@ nvmf_rdma_connect(struct spdk_nvmf_transport *transport, struct rdma_cm_event *e
|
|
|
|
|
rqpair->cm_id = event->id;
|
|
|
|
|
rqpair->listen_id = event->listen_id;
|
|
|
|
|
rqpair->qpair.transport = transport;
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
STAILQ_INIT(&rqpair->incoming_queue);
|
|
|
|
|
#endif
|
|
|
|
|
event->id->context = &rqpair->qpair;
|
|
|
|
|
|
|
|
|
|
cb_fn(&rqpair->qpair);
|
|
|
|
@ -1399,7 +1488,11 @@ nvmf_rdma_request_free(struct spdk_nvmf_rdma_request *rdma_req,
|
|
|
|
|
rdma_req->req.iovcnt = 0;
|
|
|
|
|
rdma_req->req.data = NULL;
|
|
|
|
|
rqpair->qd--;
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
STAILQ_INSERT_HEAD(&rqpair->free_queue, rdma_req, state_link);
|
|
|
|
|
#else
|
|
|
|
|
STAILQ_INSERT_HEAD(&rqpair->poller->free_queue, rdma_req, state_link);
|
|
|
|
|
#endif
|
|
|
|
|
rdma_req->state = RDMA_REQUEST_STATE_FREE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -1631,6 +1724,7 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
|
|
|
|
|
|
|
|
|
|
#define SPDK_NVMF_RDMA_DEFAULT_MAX_QUEUE_DEPTH 128
|
|
|
|
|
#define SPDK_NVMF_RDMA_DEFAULT_AQ_DEPTH 128
|
|
|
|
|
#define SPDK_NVMF_RDMA_DEFAULT_SRQ_DEPTH 4096
|
|
|
|
|
#define SPDK_NVMF_RDMA_DEFAULT_MAX_QPAIRS_PER_CTRLR 64
|
|
|
|
|
#define SPDK_NVMF_RDMA_DEFAULT_IN_CAPSULE_DATA_SIZE 4096
|
|
|
|
|
#define SPDK_NVMF_RDMA_DEFAULT_MAX_IO_SIZE 131072
|
|
|
|
@ -1649,6 +1743,7 @@ spdk_nvmf_rdma_opts_init(struct spdk_nvmf_transport_opts *opts)
|
|
|
|
|
opts->max_aq_depth = SPDK_NVMF_RDMA_DEFAULT_AQ_DEPTH;
|
|
|
|
|
opts->num_shared_buffers = SPDK_NVMF_RDMA_DEFAULT_NUM_SHARED_BUFFERS;
|
|
|
|
|
opts->buf_cache_size = SPDK_NVMF_RDMA_DEFAULT_BUFFER_CACHE_SIZE;
|
|
|
|
|
opts->max_srq_depth = SPDK_NVMF_RDMA_DEFAULT_SRQ_DEPTH;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const struct spdk_mem_map_ops g_nvmf_rdma_map_ops = {
|
|
|
|
@ -1691,15 +1786,16 @@ spdk_nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts)
|
|
|
|
|
SPDK_INFOLOG(SPDK_LOG_RDMA, "*** RDMA Transport Init ***\n"
|
|
|
|
|
" Transport opts: max_ioq_depth=%d, max_io_size=%d,\n"
|
|
|
|
|
" max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
|
|
|
|
|
" in_capsule_data_size=%d, max_aq_depth=%d\n"
|
|
|
|
|
" num_shared_buffers=%d\n",
|
|
|
|
|
" in_capsule_data_size=%d, max_aq_depth=%d,\n"
|
|
|
|
|
" num_shared_buffers=%d, max_srq_depth=%d\n",
|
|
|
|
|
opts->max_queue_depth,
|
|
|
|
|
opts->max_io_size,
|
|
|
|
|
opts->max_qpairs_per_ctrlr,
|
|
|
|
|
opts->io_unit_size,
|
|
|
|
|
opts->in_capsule_data_size,
|
|
|
|
|
opts->max_aq_depth,
|
|
|
|
|
opts->num_shared_buffers);
|
|
|
|
|
opts->num_shared_buffers,
|
|
|
|
|
opts->max_srq_depth);
|
|
|
|
|
|
|
|
|
|
/* I/O unit size cannot be larger than max I/O size */
|
|
|
|
|
if (opts->io_unit_size > opts->max_io_size) {
|
|
|
|
@ -2126,6 +2222,9 @@ spdk_nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport
|
|
|
|
|
struct spdk_nvmf_rdma_qpair *rqpair, bool drain)
|
|
|
|
|
{
|
|
|
|
|
struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
|
|
|
|
|
#ifdef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
struct spdk_nvmf_rdma_poller *rpoller = rqpair->poller;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* We process I/O in the data transfer pending queue at the highest priority. RDMA reads first */
|
|
|
|
|
STAILQ_FOREACH_SAFE(rdma_req, &rqpair->pending_rdma_read_queue, state_link, req_tmp) {
|
|
|
|
@ -2149,13 +2248,20 @@ spdk_nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
while (!STAILQ_EMPTY(&rqpair->free_queue) && !STAILQ_EMPTY(&rqpair->incoming_queue)) {
|
|
|
|
|
|
|
|
|
|
rdma_req = STAILQ_FIRST(&rqpair->free_queue);
|
|
|
|
|
STAILQ_REMOVE_HEAD(&rqpair->free_queue, state_link);
|
|
|
|
|
rdma_req->recv = STAILQ_FIRST(&rqpair->incoming_queue);
|
|
|
|
|
STAILQ_REMOVE_HEAD(&rqpair->incoming_queue, link);
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
while (!STAILQ_EMPTY(&rpoller->free_queue) && !STAILQ_EMPTY(&rpoller->incoming_queue)) {
|
|
|
|
|
rdma_req = STAILQ_FIRST(&rpoller->free_queue);
|
|
|
|
|
STAILQ_REMOVE_HEAD(&rpoller->free_queue, state_link);
|
|
|
|
|
rdma_req->recv = STAILQ_FIRST(&rpoller->incoming_queue);
|
|
|
|
|
STAILQ_REMOVE_HEAD(&rpoller->incoming_queue, link);
|
|
|
|
|
rdma_req->req.qpair = &rdma_req->recv->qpair->qpair;
|
|
|
|
|
#endif
|
|
|
|
|
rqpair->qd++;
|
|
|
|
|
rdma_req->state = RDMA_REQUEST_STATE_NEW;
|
|
|
|
|
if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
|
|
|
|
@ -2467,13 +2573,22 @@ spdk_nvmf_rdma_discover(struct spdk_nvmf_transport *transport,
|
|
|
|
|
entry->tsas.rdma.rdma_cms = SPDK_NVMF_RDMA_CMS_RDMA_CM;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
spdk_nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group);
|
|
|
|
|
|
|
|
|
|
static struct spdk_nvmf_transport_poll_group *
|
|
|
|
|
spdk_nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport)
|
|
|
|
|
{
|
|
|
|
|
struct spdk_nvmf_rdma_transport *rtransport;
|
|
|
|
|
struct spdk_nvmf_rdma_poll_group *rgroup;
|
|
|
|
|
struct spdk_nvmf_rdma_poller *poller, *tpoller;
|
|
|
|
|
struct spdk_nvmf_rdma_poller *poller;
|
|
|
|
|
struct spdk_nvmf_rdma_device *device;
|
|
|
|
|
#ifdef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
int i, rc;
|
|
|
|
|
struct ibv_srq_init_attr srq_init_attr;
|
|
|
|
|
struct spdk_nvmf_rdma_recv *rdma_recv;
|
|
|
|
|
struct spdk_nvmf_rdma_request *rdma_req;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
|
|
|
|
|
|
|
|
|
@ -2490,7 +2605,9 @@ spdk_nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport)
|
|
|
|
|
poller = calloc(1, sizeof(*poller));
|
|
|
|
|
if (!poller) {
|
|
|
|
|
SPDK_ERRLOG("Unable to allocate memory for new RDMA poller\n");
|
|
|
|
|
goto err_exit;
|
|
|
|
|
spdk_nvmf_rdma_poll_group_destroy(&rgroup->group);
|
|
|
|
|
pthread_mutex_unlock(&rtransport->lock);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
poller->device = device;
|
|
|
|
@ -2501,29 +2618,166 @@ spdk_nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport)
|
|
|
|
|
poller->cq = ibv_create_cq(device->context, DEFAULT_NVMF_RDMA_CQ_SIZE, poller, NULL, 0);
|
|
|
|
|
if (!poller->cq) {
|
|
|
|
|
SPDK_ERRLOG("Unable to create completion queue\n");
|
|
|
|
|
free(poller);
|
|
|
|
|
goto err_exit;
|
|
|
|
|
spdk_nvmf_rdma_poll_group_destroy(&rgroup->group);
|
|
|
|
|
pthread_mutex_unlock(&rtransport->lock);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
poller->num_cqe = DEFAULT_NVMF_RDMA_CQ_SIZE;
|
|
|
|
|
|
|
|
|
|
TAILQ_INSERT_TAIL(&rgroup->pollers, poller, link);
|
|
|
|
|
|
|
|
|
|
#ifdef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
poller->max_srq_depth = transport->opts.max_srq_depth;
|
|
|
|
|
|
|
|
|
|
memset(&srq_init_attr, 0, sizeof(struct ibv_srq_init_attr));
|
|
|
|
|
srq_init_attr.attr.max_wr = poller->max_srq_depth;
|
|
|
|
|
srq_init_attr.attr.max_sge = spdk_min(device->attr.max_sge, NVMF_DEFAULT_RX_SGE);
|
|
|
|
|
poller->srq = ibv_create_srq(device->pd, &srq_init_attr);
|
|
|
|
|
if (!poller->srq) {
|
|
|
|
|
SPDK_ERRLOG("Unable to create shared receive queue, errno %d\n", errno);
|
|
|
|
|
spdk_nvmf_rdma_poll_group_destroy(&rgroup->group);
|
|
|
|
|
pthread_mutex_unlock(&rtransport->lock);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Created RDMA SRQ %p: max_wr %u, max_sge %u, srq_limit %u\n",
|
|
|
|
|
poller->srq,
|
|
|
|
|
srq_init_attr.attr.max_wr,
|
|
|
|
|
srq_init_attr.attr.max_sge,
|
|
|
|
|
srq_init_attr.attr.srq_limit);
|
|
|
|
|
|
|
|
|
|
poller->reqs = calloc(poller->max_srq_depth, sizeof(*poller->reqs));
|
|
|
|
|
poller->recvs = calloc(poller->max_srq_depth, sizeof(*poller->recvs));
|
|
|
|
|
poller->cmds = spdk_dma_zmalloc(poller->max_srq_depth * sizeof(*poller->cmds),
|
|
|
|
|
0x1000, NULL);
|
|
|
|
|
poller->cpls = spdk_dma_zmalloc(poller->max_srq_depth * sizeof(*poller->cpls),
|
|
|
|
|
0x1000, NULL);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (transport->opts.in_capsule_data_size > 0) {
|
|
|
|
|
poller->bufs = spdk_dma_zmalloc(poller->max_srq_depth *
|
|
|
|
|
transport->opts.in_capsule_data_size,
|
|
|
|
|
0x1000, NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!poller->reqs || !poller->recvs || !poller->cmds ||
|
|
|
|
|
!poller->cpls || (transport->opts.in_capsule_data_size && !poller->bufs)) {
|
|
|
|
|
SPDK_ERRLOG("Unable to allocate sufficient memory for RDMA shared receive queue.\n");
|
|
|
|
|
spdk_nvmf_rdma_poll_group_destroy(&rgroup->group);
|
|
|
|
|
pthread_mutex_unlock(&rtransport->lock);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
poller->cmds_mr = ibv_reg_mr(device->pd, poller->cmds,
|
|
|
|
|
poller->max_srq_depth * sizeof(*poller->cmds),
|
|
|
|
|
IBV_ACCESS_LOCAL_WRITE);
|
|
|
|
|
poller->cpls_mr = ibv_reg_mr(device->pd, poller->cpls,
|
|
|
|
|
poller->max_srq_depth * sizeof(*poller->cpls),
|
|
|
|
|
0);
|
|
|
|
|
|
|
|
|
|
if (transport->opts.in_capsule_data_size) {
|
|
|
|
|
poller->bufs_mr = ibv_reg_mr(device->pd, poller->bufs,
|
|
|
|
|
poller->max_srq_depth *
|
|
|
|
|
transport->opts.in_capsule_data_size,
|
|
|
|
|
IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!poller->cmds_mr || !poller->cpls_mr || (transport->opts.in_capsule_data_size &&
|
|
|
|
|
!poller->bufs_mr)) {
|
|
|
|
|
SPDK_ERRLOG("Unable to register required memory for RDMA shared receive queue.\n");
|
|
|
|
|
spdk_nvmf_rdma_poll_group_destroy(&rgroup->group);
|
|
|
|
|
pthread_mutex_unlock(&rtransport->lock);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Command Array: %p Length: %lx LKey: %x\n",
|
|
|
|
|
poller->cmds, poller->max_srq_depth * sizeof(*poller->cmds), poller->cmds_mr->lkey);
|
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Completion Array: %p Length: %lx LKey: %x\n",
|
|
|
|
|
poller->cpls, poller->max_srq_depth * sizeof(*poller->cpls), poller->cpls_mr->lkey);
|
|
|
|
|
if (poller->bufs && poller->bufs_mr) {
|
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "In Capsule Data Array: %p Length: %x LKey: %x\n",
|
|
|
|
|
poller->bufs, poller->max_srq_depth *
|
|
|
|
|
transport->opts.in_capsule_data_size, poller->bufs_mr->lkey);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Initialize queues */
|
|
|
|
|
STAILQ_INIT(&poller->incoming_queue);
|
|
|
|
|
STAILQ_INIT(&poller->free_queue);
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < poller->max_srq_depth; i++) {
|
|
|
|
|
struct ibv_recv_wr *bad_wr = NULL;
|
|
|
|
|
|
|
|
|
|
rdma_recv = &poller->recvs[i];
|
|
|
|
|
rdma_recv->qpair = NULL;
|
|
|
|
|
|
|
|
|
|
/* Set up memory to receive commands */
|
|
|
|
|
if (poller->bufs) {
|
|
|
|
|
rdma_recv->buf = (void *)((uintptr_t)poller->bufs + (i *
|
|
|
|
|
transport->opts.in_capsule_data_size));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rdma_recv->rdma_wr.type = RDMA_WR_TYPE_RECV;
|
|
|
|
|
|
|
|
|
|
rdma_recv->sgl[0].addr = (uintptr_t)&poller->cmds[i];
|
|
|
|
|
rdma_recv->sgl[0].length = sizeof(poller->cmds[i]);
|
|
|
|
|
rdma_recv->sgl[0].lkey = poller->cmds_mr->lkey;
|
|
|
|
|
rdma_recv->wr.num_sge = 1;
|
|
|
|
|
|
|
|
|
|
if (rdma_recv->buf && poller->bufs_mr) {
|
|
|
|
|
rdma_recv->sgl[1].addr = (uintptr_t)rdma_recv->buf;
|
|
|
|
|
rdma_recv->sgl[1].length = transport->opts.in_capsule_data_size;
|
|
|
|
|
rdma_recv->sgl[1].lkey = poller->bufs_mr->lkey;
|
|
|
|
|
rdma_recv->wr.num_sge++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rdma_recv->wr.wr_id = (uintptr_t)&rdma_recv->rdma_wr;
|
|
|
|
|
rdma_recv->wr.sg_list = rdma_recv->sgl;
|
|
|
|
|
|
|
|
|
|
rc = ibv_post_srq_recv(poller->srq, &rdma_recv->wr, &bad_wr);
|
|
|
|
|
if (rc) {
|
|
|
|
|
SPDK_ERRLOG("Unable to post capsule for RDMA RECV\n");
|
|
|
|
|
spdk_nvmf_rdma_poll_group_destroy(&rgroup->group);
|
|
|
|
|
pthread_mutex_unlock(&rtransport->lock);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < poller->max_srq_depth; i++) {
|
|
|
|
|
rdma_req = &poller->reqs[i];
|
|
|
|
|
|
|
|
|
|
rdma_req->req.qpair = NULL;
|
|
|
|
|
rdma_req->req.cmd = NULL;
|
|
|
|
|
|
|
|
|
|
/* Set up memory to send responses */
|
|
|
|
|
rdma_req->req.rsp = &poller->cpls[i];
|
|
|
|
|
|
|
|
|
|
rdma_req->rsp.sgl[0].addr = (uintptr_t)&poller->cpls[i];
|
|
|
|
|
rdma_req->rsp.sgl[0].length = sizeof(poller->cpls[i]);
|
|
|
|
|
rdma_req->rsp.sgl[0].lkey = poller->cpls_mr->lkey;
|
|
|
|
|
|
|
|
|
|
rdma_req->rsp.rdma_wr.type = RDMA_WR_TYPE_SEND;
|
|
|
|
|
rdma_req->rsp.wr.wr_id = (uintptr_t)&rdma_req->rsp.rdma_wr;
|
|
|
|
|
rdma_req->rsp.wr.next = NULL;
|
|
|
|
|
rdma_req->rsp.wr.opcode = IBV_WR_SEND;
|
|
|
|
|
rdma_req->rsp.wr.send_flags = IBV_SEND_SIGNALED;
|
|
|
|
|
rdma_req->rsp.wr.sg_list = rdma_req->rsp.sgl;
|
|
|
|
|
rdma_req->rsp.wr.num_sge = SPDK_COUNTOF(rdma_req->rsp.sgl);
|
|
|
|
|
|
|
|
|
|
/* Set up memory for data buffers */
|
|
|
|
|
rdma_req->data.rdma_wr.type = RDMA_WR_TYPE_DATA;
|
|
|
|
|
rdma_req->data.wr.wr_id = (uintptr_t)&rdma_req->data.rdma_wr;
|
|
|
|
|
rdma_req->data.wr.next = NULL;
|
|
|
|
|
rdma_req->data.wr.send_flags = IBV_SEND_SIGNALED;
|
|
|
|
|
rdma_req->data.wr.sg_list = rdma_req->data.sgl;
|
|
|
|
|
rdma_req->data.wr.num_sge = SPDK_COUNTOF(rdma_req->data.sgl);
|
|
|
|
|
|
|
|
|
|
/* Initialize request state to FREE */
|
|
|
|
|
rdma_req->state = RDMA_REQUEST_STATE_FREE;
|
|
|
|
|
STAILQ_INSERT_TAIL(&poller->free_queue, rdma_req, state_link);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pthread_mutex_unlock(&rtransport->lock);
|
|
|
|
|
return &rgroup->group;
|
|
|
|
|
|
|
|
|
|
err_exit:
|
|
|
|
|
TAILQ_FOREACH_SAFE(poller, &rgroup->pollers, link, tpoller) {
|
|
|
|
|
TAILQ_REMOVE(&rgroup->pollers, poller, link);
|
|
|
|
|
if (poller->cq) {
|
|
|
|
|
ibv_destroy_cq(poller->cq);
|
|
|
|
|
}
|
|
|
|
|
free(poller);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
free(rgroup);
|
|
|
|
|
pthread_mutex_unlock(&rtransport->lock);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
@ -2542,6 +2796,32 @@ spdk_nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
|
|
|
|
|
TAILQ_FOREACH_SAFE(poller, &rgroup->pollers, link, tmp) {
|
|
|
|
|
TAILQ_REMOVE(&rgroup->pollers, poller, link);
|
|
|
|
|
|
|
|
|
|
#ifdef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
if (poller->cmds_mr) {
|
|
|
|
|
ibv_dereg_mr(poller->cmds_mr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (poller->cpls_mr) {
|
|
|
|
|
ibv_dereg_mr(poller->cpls_mr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (poller->bufs_mr) {
|
|
|
|
|
ibv_dereg_mr(poller->bufs_mr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (poller->srq) {
|
|
|
|
|
ibv_destroy_srq(poller->srq);
|
|
|
|
|
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Destroyed RDMA shared queue %p\n", poller->srq);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Free all memory */
|
|
|
|
|
spdk_dma_free(poller->cmds);
|
|
|
|
|
spdk_dma_free(poller->cpls);
|
|
|
|
|
spdk_dma_free(poller->bufs);
|
|
|
|
|
free(poller->reqs);
|
|
|
|
|
free(poller->recvs);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
if (poller->cq) {
|
|
|
|
|
ibv_destroy_cq(poller->cq);
|
|
|
|
|
}
|
|
|
|
@ -2689,6 +2969,22 @@ spdk_nvmf_rdma_close_qpair(struct spdk_nvmf_qpair *qpair)
|
|
|
|
|
NVMF_RDMA_QPAIR_DESTROY_TIMEOUT_US);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
static struct spdk_nvmf_rdma_qpair *
|
|
|
|
|
get_rdma_qpair_from_wc(struct spdk_nvmf_rdma_poller *rpoller, struct ibv_wc *wc)
|
|
|
|
|
{
|
|
|
|
|
struct spdk_nvmf_rdma_qpair *rqpair;
|
|
|
|
|
/* @todo: improve QP search */
|
|
|
|
|
TAILQ_FOREACH(rqpair, &rpoller->qpairs, link) {
|
|
|
|
|
if (wc->qp_num == rqpair->cm_id->qp->qp_num) {
|
|
|
|
|
return rqpair;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
SPDK_ERRLOG("Didn't find QP with qp_num %u\n", wc->qp_num);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
|
static int
|
|
|
|
|
spdk_nvmf_rdma_req_is_completing(struct spdk_nvmf_rdma_request *rdma_req)
|
|
|
|
@ -2746,14 +3042,20 @@ spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
|
|
|
|
break;
|
|
|
|
|
case RDMA_WR_TYPE_RECV:
|
|
|
|
|
rdma_recv = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_recv, rdma_wr);
|
|
|
|
|
#ifdef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
rdma_recv->qpair = get_rdma_qpair_from_wc(rpoller, &wc[i]);
|
|
|
|
|
assert(rdma_recv->qpair != NULL);
|
|
|
|
|
#endif
|
|
|
|
|
rqpair = rdma_recv->qpair;
|
|
|
|
|
|
|
|
|
|
/* Dump this into the incoming queue. This gets cleaned up when
|
|
|
|
|
* the queue pair disconnects or recovers. */
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
STAILQ_INSERT_TAIL(&rqpair->incoming_queue, rdma_recv, link);
|
|
|
|
|
#else
|
|
|
|
|
STAILQ_INSERT_TAIL(&rpoller->incoming_queue, rdma_recv, link);
|
|
|
|
|
#endif
|
|
|
|
|
rqpair->current_recv_depth++;
|
|
|
|
|
|
|
|
|
|
/* Don't worry about responding to recv overflow, we are disconnecting anyways */
|
|
|
|
|
break;
|
|
|
|
|
case RDMA_WR_TYPE_DATA:
|
|
|
|
|
/* If the data transfer fails still force the queue into the error state,
|
|
|
|
@ -2841,6 +3143,10 @@ spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
|
|
|
|
case IBV_WC_RECV:
|
|
|
|
|
assert(rdma_wr->type == RDMA_WR_TYPE_RECV);
|
|
|
|
|
rdma_recv = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_recv, rdma_wr);
|
|
|
|
|
#ifdef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
rdma_recv->qpair = get_rdma_qpair_from_wc(rpoller, &wc[i]);
|
|
|
|
|
assert(rdma_recv->qpair != NULL);
|
|
|
|
|
#endif
|
|
|
|
|
rqpair = rdma_recv->qpair;
|
|
|
|
|
/* The qpair should not send more requests than are allowed per qpair. */
|
|
|
|
|
if (rqpair->current_recv_depth >= rqpair->max_queue_depth) {
|
|
|
|
@ -2848,7 +3154,12 @@ spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
|
|
|
|
} else {
|
|
|
|
|
rqpair->current_recv_depth++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifndef SPDK_CONFIG_RDMA_SRQ
|
|
|
|
|
STAILQ_INSERT_TAIL(&rqpair->incoming_queue, rdma_recv, link);
|
|
|
|
|
#else
|
|
|
|
|
STAILQ_INSERT_TAIL(&rpoller->incoming_queue, rdma_recv, link);
|
|
|
|
|
#endif
|
|
|
|
|
/* Try to process other queued requests */
|
|
|
|
|
spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair, false);
|
|
|
|
|
break;
|
|
|
|
|