nvmf/rdma: recover qp from fatal errors
RDMA QP is attempted to recover after IBV_EVENT_QP_FATAL event is received from IBV asynchronous event API. RDMA QP is put into ERROR state and is not processing any inbound requests. The outstanding requests are only allowed to COMPLETED and FREE states, no outbound transfers are performed. IBV_EVENT_QP_LAST_WQE_REACHED or IBV_EVENT_SQ_DRAINED event is expected to follow IBV_EVENT_QP_FATAL, giving a go to draining of all outstanding requests and freeing the associated resources. The requests executed by block layer are gracefully allowed to complete, but no outbound transfers are made. Note, outstanding requests can not be reliably completed through polling the CQ, as WC's with failure status might not have all the fields valid. The failed WC's are dropped and the outstanding requests are fetched from the appropriate state's linked list. QP recovery is triggered when there is no more outstanding requests. If QP recovery is completed succesfully, the RDMA QP is put back into ACTIVE state, the QP disconnect is triggered otherwise. Change-Id: I45ee7feea067f80ccc6402518990014d691fbda3 Signed-off-by: Philipp Skadorov <philipp.skadorov@wdc.com> Reviewed-on: https://review.gerrithub.io/416879 Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
parent
e47f972dff
commit
4bfb557d80
@ -1727,3 +1727,14 @@ spdk_nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr)
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
spdk_nvmf_ctrlr_drain_aer_req(struct spdk_nvmf_ctrlr *ctrlr)
|
||||||
|
{
|
||||||
|
if (!ctrlr->aer_req) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
spdk_nvmf_request_complete(ctrlr->aer_req);
|
||||||
|
ctrlr->aer_req = NULL;
|
||||||
|
}
|
||||||
|
@ -60,6 +60,7 @@ enum spdk_nvmf_qpair_state {
|
|||||||
SPDK_NVMF_QPAIR_ACTIVATING,
|
SPDK_NVMF_QPAIR_ACTIVATING,
|
||||||
SPDK_NVMF_QPAIR_ACTIVE,
|
SPDK_NVMF_QPAIR_ACTIVE,
|
||||||
SPDK_NVMF_QPAIR_DEACTIVATING,
|
SPDK_NVMF_QPAIR_DEACTIVATING,
|
||||||
|
SPDK_NVMF_QPAIR_ERROR,
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef void (*spdk_nvmf_state_change_done)(void *cb_arg, int status);
|
typedef void (*spdk_nvmf_state_change_done)(void *cb_arg, int status);
|
||||||
@ -291,6 +292,8 @@ struct spdk_nvmf_ctrlr *spdk_nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem
|
|||||||
uint16_t cntlid);
|
uint16_t cntlid);
|
||||||
int spdk_nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr);
|
int spdk_nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr);
|
||||||
|
|
||||||
|
void spdk_nvmf_ctrlr_drain_aer_req(struct spdk_nvmf_ctrlr *ctrlr);
|
||||||
|
|
||||||
static inline struct spdk_nvmf_ns *
|
static inline struct spdk_nvmf_ns *
|
||||||
_spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
|
_spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
|
||||||
{
|
{
|
||||||
|
498
lib/nvmf/rdma.c
498
lib/nvmf/rdma.c
@ -259,6 +259,12 @@ struct spdk_nvmf_rdma_qpair {
|
|||||||
/* Mgmt channel */
|
/* Mgmt channel */
|
||||||
struct spdk_io_channel *mgmt_channel;
|
struct spdk_io_channel *mgmt_channel;
|
||||||
struct spdk_nvmf_rdma_mgmt_channel *ch;
|
struct spdk_nvmf_rdma_mgmt_channel *ch;
|
||||||
|
|
||||||
|
/* IBV queue pair attributes: they are used to manage
|
||||||
|
* qp state and recover from errors.
|
||||||
|
*/
|
||||||
|
struct ibv_qp_init_attr ibv_init_attr;
|
||||||
|
struct ibv_qp_attr ibv_attr;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct spdk_nvmf_rdma_poller {
|
struct spdk_nvmf_rdma_poller {
|
||||||
@ -324,6 +330,125 @@ struct spdk_nvmf_rdma_mgmt_channel {
|
|||||||
TAILQ_HEAD(, spdk_nvmf_rdma_request) pending_data_buf_queue;
|
TAILQ_HEAD(, spdk_nvmf_rdma_request) pending_data_buf_queue;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* API to IBV QueuePair */
|
||||||
|
static const char *str_ibv_qp_state[] = {
|
||||||
|
"IBV_QPS_RESET",
|
||||||
|
"IBV_QPS_INIT",
|
||||||
|
"IBV_QPS_RTR",
|
||||||
|
"IBV_QPS_RTS",
|
||||||
|
"IBV_QPS_SQD",
|
||||||
|
"IBV_QPS_SQE",
|
||||||
|
"IBV_QPS_ERR"
|
||||||
|
};
|
||||||
|
|
||||||
|
static enum ibv_qp_state
|
||||||
|
spdk_nvmf_rdma_get_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair) {
|
||||||
|
return rqpair->ibv_attr.qp_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
spdk_nvmf_rdma_update_ibv_qp(struct spdk_nvmf_rdma_qpair *rqpair)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
/* All the attributes needed for recovery */
|
||||||
|
static int spdk_nvmf_ibv_attr_mask =
|
||||||
|
IBV_QP_STATE |
|
||||||
|
IBV_QP_PKEY_INDEX |
|
||||||
|
IBV_QP_PORT |
|
||||||
|
IBV_QP_ACCESS_FLAGS |
|
||||||
|
IBV_QP_AV |
|
||||||
|
IBV_QP_PATH_MTU |
|
||||||
|
IBV_QP_DEST_QPN |
|
||||||
|
IBV_QP_RQ_PSN |
|
||||||
|
IBV_QP_MAX_DEST_RD_ATOMIC |
|
||||||
|
IBV_QP_MIN_RNR_TIMER |
|
||||||
|
IBV_QP_SQ_PSN |
|
||||||
|
IBV_QP_TIMEOUT |
|
||||||
|
IBV_QP_RETRY_CNT |
|
||||||
|
IBV_QP_RNR_RETRY |
|
||||||
|
IBV_QP_MAX_QP_RD_ATOMIC;
|
||||||
|
|
||||||
|
rc = ibv_query_qp(rqpair->cm_id->qp, &rqpair->ibv_attr,
|
||||||
|
spdk_nvmf_ibv_attr_mask, &rqpair->ibv_init_attr);
|
||||||
|
assert(!rc);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
spdk_nvmf_rdma_set_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair,
|
||||||
|
enum ibv_qp_state new_state)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
enum ibv_qp_state state;
|
||||||
|
static int attr_mask_rc[] = {
|
||||||
|
[IBV_QPS_RESET] = IBV_QP_STATE,
|
||||||
|
[IBV_QPS_INIT] = (IBV_QP_STATE |
|
||||||
|
IBV_QP_PKEY_INDEX |
|
||||||
|
IBV_QP_PORT |
|
||||||
|
IBV_QP_ACCESS_FLAGS),
|
||||||
|
[IBV_QPS_RTR] = (IBV_QP_STATE |
|
||||||
|
IBV_QP_AV |
|
||||||
|
IBV_QP_PATH_MTU |
|
||||||
|
IBV_QP_DEST_QPN |
|
||||||
|
IBV_QP_RQ_PSN |
|
||||||
|
IBV_QP_MAX_DEST_RD_ATOMIC |
|
||||||
|
IBV_QP_MIN_RNR_TIMER),
|
||||||
|
[IBV_QPS_RTS] = (IBV_QP_STATE |
|
||||||
|
IBV_QP_SQ_PSN |
|
||||||
|
IBV_QP_TIMEOUT |
|
||||||
|
IBV_QP_RETRY_CNT |
|
||||||
|
IBV_QP_RNR_RETRY |
|
||||||
|
IBV_QP_MAX_QP_RD_ATOMIC),
|
||||||
|
[IBV_QPS_SQD] = IBV_QP_STATE,
|
||||||
|
[IBV_QPS_SQE] = IBV_QP_STATE,
|
||||||
|
[IBV_QPS_ERR] = IBV_QP_STATE,
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (new_state) {
|
||||||
|
case IBV_QPS_RESET:
|
||||||
|
case IBV_QPS_INIT:
|
||||||
|
case IBV_QPS_RTR:
|
||||||
|
case IBV_QPS_RTS:
|
||||||
|
case IBV_QPS_SQD:
|
||||||
|
case IBV_QPS_SQE:
|
||||||
|
case IBV_QPS_ERR:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
SPDK_ERRLOG("QP#%d: bad state requested: %u\n",
|
||||||
|
rqpair->qpair.qid, new_state);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
rqpair->ibv_attr.cur_qp_state = rqpair->ibv_attr.qp_state;
|
||||||
|
rqpair->ibv_attr.qp_state = new_state;
|
||||||
|
rqpair->ibv_attr.ah_attr.port_num = rqpair->ibv_attr.port_num;
|
||||||
|
|
||||||
|
rc = ibv_modify_qp(rqpair->cm_id->qp, &rqpair->ibv_attr,
|
||||||
|
attr_mask_rc[new_state]);
|
||||||
|
|
||||||
|
if (rc) {
|
||||||
|
SPDK_ERRLOG("QP#%d: failed to set state to: %s, %d (%s)\n",
|
||||||
|
rqpair->qpair.qid, str_ibv_qp_state[new_state], errno, strerror(errno));
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
rc = spdk_nvmf_rdma_update_ibv_qp(rqpair);
|
||||||
|
|
||||||
|
if (rc) {
|
||||||
|
SPDK_ERRLOG("QP#%d: failed to update attributes\n", rqpair->qpair.qid);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
state = spdk_nvmf_rdma_get_ibv_state(rqpair);
|
||||||
|
|
||||||
|
if (state != new_state) {
|
||||||
|
SPDK_ERRLOG("QP#%d: expected state: %s, actual state: %s\n",
|
||||||
|
rqpair->qpair.qid, str_ibv_qp_state[new_state],
|
||||||
|
str_ibv_qp_state[state]);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
SPDK_NOTICELOG("IBV QP#%u changed to: %s\n", rqpair->qpair.qid,
|
||||||
|
str_ibv_qp_state[state]);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void spdk_nvmf_rdma_request_set_state(struct spdk_nvmf_rdma_request *rdma_req,
|
static void spdk_nvmf_rdma_request_set_state(struct spdk_nvmf_rdma_request *rdma_req,
|
||||||
enum spdk_nvmf_rdma_request_state state)
|
enum spdk_nvmf_rdma_request_state state)
|
||||||
{
|
{
|
||||||
@ -401,23 +526,24 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
|
|||||||
struct spdk_nvmf_rdma_transport *rtransport;
|
struct spdk_nvmf_rdma_transport *rtransport;
|
||||||
struct spdk_nvmf_rdma_qpair *rqpair;
|
struct spdk_nvmf_rdma_qpair *rqpair;
|
||||||
int rc, i;
|
int rc, i;
|
||||||
struct ibv_qp_init_attr attr;
|
|
||||||
struct spdk_nvmf_rdma_recv *rdma_recv;
|
struct spdk_nvmf_rdma_recv *rdma_recv;
|
||||||
struct spdk_nvmf_rdma_request *rdma_req;
|
struct spdk_nvmf_rdma_request *rdma_req;
|
||||||
|
|
||||||
rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
|
rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
|
||||||
rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
|
rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
|
||||||
|
|
||||||
memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
|
memset(&rqpair->ibv_init_attr, 0, sizeof(struct ibv_qp_init_attr));
|
||||||
attr.qp_type = IBV_QPT_RC;
|
rqpair->ibv_init_attr.qp_context = rqpair;
|
||||||
attr.send_cq = rqpair->poller->cq;
|
rqpair->ibv_init_attr.qp_type = IBV_QPT_RC;
|
||||||
attr.recv_cq = rqpair->poller->cq;
|
rqpair->ibv_init_attr.send_cq = rqpair->poller->cq;
|
||||||
attr.cap.max_send_wr = rqpair->max_queue_depth * 2; /* SEND, READ, and WRITE operations */
|
rqpair->ibv_init_attr.recv_cq = rqpair->poller->cq;
|
||||||
attr.cap.max_recv_wr = rqpair->max_queue_depth; /* RECV operations */
|
rqpair->ibv_init_attr.cap.max_send_wr = rqpair->max_queue_depth *
|
||||||
attr.cap.max_send_sge = SPDK_NVMF_MAX_SGL_ENTRIES;
|
2; /* SEND, READ, and WRITE operations */
|
||||||
attr.cap.max_recv_sge = NVMF_DEFAULT_RX_SGE;
|
rqpair->ibv_init_attr.cap.max_recv_wr = rqpair->max_queue_depth; /* RECV operations */
|
||||||
|
rqpair->ibv_init_attr.cap.max_send_sge = SPDK_NVMF_MAX_SGL_ENTRIES;
|
||||||
|
rqpair->ibv_init_attr.cap.max_recv_sge = NVMF_DEFAULT_RX_SGE;
|
||||||
|
|
||||||
rc = rdma_create_qp(rqpair->cm_id, NULL, &attr);
|
rc = rdma_create_qp(rqpair->cm_id, NULL, &rqpair->ibv_init_attr);
|
||||||
if (rc) {
|
if (rc) {
|
||||||
SPDK_ERRLOG("rdma_create_qp failed: errno %d: %s\n", errno, spdk_strerror(errno));
|
SPDK_ERRLOG("rdma_create_qp failed: errno %d: %s\n", errno, spdk_strerror(errno));
|
||||||
rdma_destroy_id(rqpair->cm_id);
|
rdma_destroy_id(rqpair->cm_id);
|
||||||
@ -548,6 +674,7 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
|
|||||||
TAILQ_INSERT_TAIL(&rqpair->state_queue[rdma_req->state], rdma_req, state_link);
|
TAILQ_INSERT_TAIL(&rqpair->state_queue[rdma_req->state], rdma_req, state_link);
|
||||||
rqpair->state_cntr[rdma_req->state]++;
|
rqpair->state_cntr[rdma_req->state]++;
|
||||||
}
|
}
|
||||||
|
spdk_nvmf_rdma_update_ibv_qp(rqpair);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -1101,6 +1228,11 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
|
|||||||
|
|
||||||
TAILQ_REMOVE(&rqpair->incoming_queue, rdma_recv, link);
|
TAILQ_REMOVE(&rqpair->incoming_queue, rdma_recv, link);
|
||||||
|
|
||||||
|
if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ERROR ||
|
||||||
|
rqpair->qpair.state == SPDK_NVMF_QPAIR_DEACTIVATING) {
|
||||||
|
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
|
||||||
|
break;
|
||||||
|
}
|
||||||
/* The next state transition depends on the data transfer needs of this request. */
|
/* The next state transition depends on the data transfer needs of this request. */
|
||||||
rdma_req->req.xfer = spdk_nvmf_rdma_request_get_xfer(rdma_req);
|
rdma_req->req.xfer = spdk_nvmf_rdma_request_get_xfer(rdma_req);
|
||||||
|
|
||||||
@ -1726,10 +1858,247 @@ spdk_nvmf_rdma_qpair_is_idle(struct spdk_nvmf_qpair *qpair)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
spdk_nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport,
|
||||||
|
struct spdk_nvmf_rdma_qpair *rqpair)
|
||||||
|
{
|
||||||
|
struct spdk_nvmf_rdma_recv *rdma_recv, *recv_tmp;
|
||||||
|
struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
|
||||||
|
|
||||||
|
/* We process I/O in the data transfer pending queue at the highest priority. */
|
||||||
|
TAILQ_FOREACH_SAFE(rdma_req, &rqpair->state_queue[RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING],
|
||||||
|
state_link, req_tmp) {
|
||||||
|
if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The second highest priority is I/O waiting on memory buffers. */
|
||||||
|
TAILQ_FOREACH_SAFE(rdma_req, &rqpair->ch->pending_data_buf_queue, link,
|
||||||
|
req_tmp) {
|
||||||
|
if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do not process newly received commands if qp is in ERROR state,
|
||||||
|
* wait till the recovery is complete.
|
||||||
|
*/
|
||||||
|
if (rqpair->qpair.state == SPDK_NVMF_QPAIR_ERROR) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The lowest priority is processing newly received commands */
|
||||||
|
TAILQ_FOREACH_SAFE(rdma_recv, &rqpair->incoming_queue, link, recv_tmp) {
|
||||||
|
if (TAILQ_EMPTY(&rqpair->state_queue[RDMA_REQUEST_STATE_FREE])) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
rdma_req = TAILQ_FIRST(&rqpair->state_queue[RDMA_REQUEST_STATE_FREE]);
|
||||||
|
rdma_req->recv = rdma_recv;
|
||||||
|
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_NEW);
|
||||||
|
if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The recovery completion event handler to be executed in the rqpair
|
||||||
|
* poll group thread. It kicks off processing of the requests that are
|
||||||
|
* waiting for the rqpair is back online.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
_spdk_nvmf_rdma_qpair_process_pending(void *arg)
|
||||||
|
{
|
||||||
|
struct spdk_nvmf_rdma_qpair *rqpair;
|
||||||
|
struct spdk_nvmf_rdma_transport *rtransport;
|
||||||
|
|
||||||
|
rqpair = arg;
|
||||||
|
rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport,
|
||||||
|
struct spdk_nvmf_rdma_transport, transport);
|
||||||
|
spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
spdk_nvmf_rdma_recover(struct spdk_nvmf_rdma_qpair *rqpair)
|
||||||
|
{
|
||||||
|
int recovered;
|
||||||
|
enum ibv_qp_state state, next_state;
|
||||||
|
|
||||||
|
state = spdk_nvmf_rdma_get_ibv_state(rqpair);
|
||||||
|
next_state = state;
|
||||||
|
|
||||||
|
SPDK_NOTICELOG("IBV QP#%u is in state: %s\n",
|
||||||
|
rqpair->qpair.qid,
|
||||||
|
str_ibv_qp_state[state]);
|
||||||
|
|
||||||
|
if (!(state == IBV_QPS_ERR || state == IBV_QPS_RESET)) {
|
||||||
|
SPDK_ERRLOG("Can't recover IBV qp#%u from the state: %s\n",
|
||||||
|
rqpair->qpair.qid,
|
||||||
|
str_ibv_qp_state[state]);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
rqpair->qpair.state = SPDK_NVMF_QPAIR_INACTIVE;
|
||||||
|
recovered = 0;
|
||||||
|
|
||||||
|
while (!recovered) {
|
||||||
|
state = spdk_nvmf_rdma_get_ibv_state(rqpair);
|
||||||
|
switch (state) {
|
||||||
|
case IBV_QPS_ERR:
|
||||||
|
next_state = IBV_QPS_RESET;
|
||||||
|
break;
|
||||||
|
case IBV_QPS_RESET:
|
||||||
|
next_state = IBV_QPS_INIT;
|
||||||
|
break;
|
||||||
|
case IBV_QPS_INIT:
|
||||||
|
next_state = IBV_QPS_RTR;
|
||||||
|
break;
|
||||||
|
case IBV_QPS_RTR:
|
||||||
|
next_state = IBV_QPS_RTS;
|
||||||
|
break;
|
||||||
|
case IBV_QPS_RTS:
|
||||||
|
recovered = 1;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
SPDK_ERRLOG("IBV qp#%u unexpected state for recovery: %u\n",
|
||||||
|
rqpair->qpair.qid, state);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
/* Do not transition into same state */
|
||||||
|
if (next_state == state) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (spdk_nvmf_rdma_set_ibv_state(rqpair, next_state)) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rqpair->qpair.state = SPDK_NVMF_QPAIR_ACTIVE;
|
||||||
|
spdk_thread_send_msg(rqpair->qpair.group->thread, _spdk_nvmf_rdma_qpair_process_pending, rqpair);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
error:
|
||||||
|
SPDK_ERRLOG("IBV qp#%u recovery failed\n", rqpair->qpair.qid);
|
||||||
|
/* Put NVMf qpair back into error state so recovery
|
||||||
|
will trigger disconnect */
|
||||||
|
rqpair->qpair.state = SPDK_NVMF_QPAIR_ERROR;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
spdk_nvmf_rdma_drain_state_queue(struct spdk_nvmf_rdma_qpair *rqpair,
|
||||||
|
enum spdk_nvmf_rdma_request_state state)
|
||||||
|
{
|
||||||
|
struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
|
||||||
|
struct spdk_nvmf_rdma_transport *rtransport;
|
||||||
|
|
||||||
|
TAILQ_FOREACH_SAFE(rdma_req, &rqpair->state_queue[state], state_link, req_tmp) {
|
||||||
|
rtransport = SPDK_CONTAINEROF(rdma_req->req.qpair->transport,
|
||||||
|
struct spdk_nvmf_rdma_transport, transport);
|
||||||
|
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
|
||||||
|
spdk_nvmf_rdma_request_process(rtransport, rdma_req);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void spdk_nvmf_rdma_drain_rw_reqs(struct spdk_nvmf_rdma_qpair *rqpair)
|
||||||
|
{
|
||||||
|
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
|
||||||
|
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
|
||||||
|
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_COMPLETING);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void spdk_nvmf_rdma_drain_pending_reqs(struct spdk_nvmf_rdma_qpair *rqpair)
|
||||||
|
{
|
||||||
|
struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
|
||||||
|
|
||||||
|
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING);
|
||||||
|
/* First wipe the requests waiting for buffer from the global list */
|
||||||
|
TAILQ_FOREACH_SAFE(rdma_req, &rqpair->state_queue[RDMA_REQUEST_STATE_NEED_BUFFER], link, req_tmp) {
|
||||||
|
TAILQ_REMOVE(&rqpair->ch->pending_data_buf_queue, rdma_req, link);
|
||||||
|
}
|
||||||
|
/* Then drain the requests through the rdma queue */
|
||||||
|
spdk_nvmf_rdma_drain_state_queue(rqpair, RDMA_REQUEST_STATE_NEED_BUFFER);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
spdk_nvmf_rdma_qp_drained(struct spdk_nvmf_rdma_qpair *rqpair)
|
||||||
|
{
|
||||||
|
SPDK_NOTICELOG("IBV QP#%u drained\n", rqpair->qpair.qid);
|
||||||
|
|
||||||
|
if (spdk_nvmf_qpair_is_admin_queue(&rqpair->qpair)) {
|
||||||
|
spdk_nvmf_ctrlr_drain_aer_req(rqpair->qpair.ctrlr);
|
||||||
|
}
|
||||||
|
|
||||||
|
spdk_nvmf_rdma_drain_pending_reqs(rqpair);
|
||||||
|
spdk_nvmf_rdma_drain_rw_reqs(rqpair);
|
||||||
|
|
||||||
|
if (!spdk_nvmf_rdma_qpair_is_idle(&rqpair->qpair)) {
|
||||||
|
/* There must be outstanding requests down to media.
|
||||||
|
* If so, wait till they're complete.
|
||||||
|
*/
|
||||||
|
assert(!TAILQ_EMPTY(&rqpair->qpair.outstanding));
|
||||||
|
SPDK_DEBUGLOG(SPDK_LOG_RDMA,
|
||||||
|
"QP#%u (%p): wait for outstanding requests...\n",
|
||||||
|
rqpair->qpair.qid, &rqpair->qpair);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rqpair->qpair.state != SPDK_NVMF_QPAIR_ERROR) {
|
||||||
|
/* Do not start recovery if qp is not in error state. */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (spdk_nvmf_rdma_recover(rqpair) != 0) {
|
||||||
|
SPDK_NOTICELOG("QP#%u (%p): recovery failed, disconnecting...\n",
|
||||||
|
rqpair->qpair.qid, &rqpair->qpair);
|
||||||
|
spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
_spdk_nvmf_rdma_sq_drained(void *cb_arg)
|
||||||
|
{
|
||||||
|
spdk_nvmf_rdma_qp_drained(cb_arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
_spdk_nvmf_rdma_qp_last_wqe(void *cb_arg)
|
||||||
|
{
|
||||||
|
struct spdk_nvmf_rdma_qpair *rqpair = cb_arg;
|
||||||
|
|
||||||
|
if (rqpair->qpair.state != SPDK_NVMF_QPAIR_ERROR) {
|
||||||
|
SPDK_ERRLOG("QP#%u is not in ERROR state, dropping LAST_WQE event...\n",
|
||||||
|
rqpair->qpair.qid);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
spdk_nvmf_rdma_qp_drained(rqpair);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
_spdk_nvmf_rdma_qp_error(void *arg)
|
||||||
|
{
|
||||||
|
struct spdk_nvmf_rdma_qpair *rqpair = arg;
|
||||||
|
|
||||||
|
rqpair->qpair.state = SPDK_NVMF_QPAIR_ERROR;
|
||||||
|
|
||||||
|
if (spdk_nvmf_rdma_qpair_is_idle(&rqpair->qpair)) {
|
||||||
|
/* There are no outstanding requests */
|
||||||
|
spdk_nvmf_rdma_qp_drained(rqpair);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct spdk_nvmf_rdma_qpair *
|
||||||
|
spdk_nvmf_rqpair_from_qp(struct ibv_qp *qp)
|
||||||
|
{
|
||||||
|
return qp->qp_context;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
spdk_nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device)
|
spdk_nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
|
struct spdk_nvmf_rdma_qpair *rqpair;
|
||||||
struct ibv_async_event event;
|
struct ibv_async_event event;
|
||||||
|
|
||||||
rc = ibv_get_async_event(device->context, &event);
|
rc = ibv_get_async_event(device->context, &event);
|
||||||
@ -1742,6 +2111,42 @@ spdk_nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device)
|
|||||||
|
|
||||||
SPDK_NOTICELOG("Async event: %s\n",
|
SPDK_NOTICELOG("Async event: %s\n",
|
||||||
ibv_event_type_str(event.event_type));
|
ibv_event_type_str(event.event_type));
|
||||||
|
|
||||||
|
switch (event.event_type) {
|
||||||
|
case IBV_EVENT_QP_FATAL:
|
||||||
|
rqpair = spdk_nvmf_rqpair_from_qp(event.element.qp);
|
||||||
|
spdk_nvmf_rdma_update_ibv_qp(rqpair);
|
||||||
|
spdk_thread_send_msg(rqpair->qpair.group->thread, _spdk_nvmf_rdma_qp_error, rqpair);
|
||||||
|
break;
|
||||||
|
case IBV_EVENT_SQ_DRAINED:
|
||||||
|
rqpair = spdk_nvmf_rqpair_from_qp(event.element.qp);
|
||||||
|
spdk_nvmf_rdma_update_ibv_qp(rqpair);
|
||||||
|
spdk_thread_send_msg(rqpair->qpair.group->thread, _spdk_nvmf_rdma_sq_drained, rqpair);
|
||||||
|
break;
|
||||||
|
case IBV_EVENT_QP_LAST_WQE_REACHED:
|
||||||
|
rqpair = spdk_nvmf_rqpair_from_qp(event.element.qp);
|
||||||
|
spdk_nvmf_rdma_update_ibv_qp(rqpair);
|
||||||
|
spdk_thread_send_msg(rqpair->qpair.group->thread, _spdk_nvmf_rdma_qp_last_wqe, rqpair);
|
||||||
|
break;
|
||||||
|
case IBV_EVENT_CQ_ERR:
|
||||||
|
case IBV_EVENT_QP_REQ_ERR:
|
||||||
|
case IBV_EVENT_QP_ACCESS_ERR:
|
||||||
|
case IBV_EVENT_COMM_EST:
|
||||||
|
case IBV_EVENT_PATH_MIG:
|
||||||
|
case IBV_EVENT_PATH_MIG_ERR:
|
||||||
|
case IBV_EVENT_DEVICE_FATAL:
|
||||||
|
case IBV_EVENT_PORT_ACTIVE:
|
||||||
|
case IBV_EVENT_PORT_ERR:
|
||||||
|
case IBV_EVENT_LID_CHANGE:
|
||||||
|
case IBV_EVENT_PKEY_CHANGE:
|
||||||
|
case IBV_EVENT_SM_CHANGE:
|
||||||
|
case IBV_EVENT_SRQ_ERR:
|
||||||
|
case IBV_EVENT_SRQ_LIMIT_REACHED:
|
||||||
|
case IBV_EVENT_CLIENT_REREGISTER:
|
||||||
|
case IBV_EVENT_GID_CHANGE:
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
ibv_ack_async_event(&event);
|
ibv_ack_async_event(&event);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1990,11 +2395,35 @@ spdk_nvmf_rdma_request_complete(struct spdk_nvmf_request *req)
|
|||||||
{
|
{
|
||||||
struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(req->qpair->transport,
|
struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(req->qpair->transport,
|
||||||
struct spdk_nvmf_rdma_transport, transport);
|
struct spdk_nvmf_rdma_transport, transport);
|
||||||
struct spdk_nvmf_rdma_request *rdma_req = SPDK_CONTAINEROF(req, struct spdk_nvmf_rdma_request, req);
|
struct spdk_nvmf_rdma_request *rdma_req = SPDK_CONTAINEROF(req,
|
||||||
|
struct spdk_nvmf_rdma_request, req);
|
||||||
|
struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair,
|
||||||
|
struct spdk_nvmf_rdma_qpair, qpair);
|
||||||
|
|
||||||
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_EXECUTED);
|
switch (rqpair->qpair.state) {
|
||||||
spdk_nvmf_rdma_request_process(rtransport, rdma_req);
|
case SPDK_NVMF_QPAIR_ERROR:
|
||||||
|
/* Mark request as COMPLETED for ERROR state
|
||||||
|
* so RDMA transfer is not kicked off
|
||||||
|
*/
|
||||||
|
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_COMPLETED);
|
||||||
|
spdk_nvmf_rdma_request_process(rtransport, rdma_req);
|
||||||
|
|
||||||
|
/* QP in ERROR state is awaiting for all requests to be
|
||||||
|
* completed by bdev layer
|
||||||
|
*/
|
||||||
|
if (spdk_nvmf_rdma_qpair_is_idle(&rqpair->qpair)) {
|
||||||
|
spdk_nvmf_rdma_qp_drained(rqpair);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case SPDK_NVMF_QPAIR_INACTIVE:
|
||||||
|
case SPDK_NVMF_QPAIR_ACTIVATING:
|
||||||
|
case SPDK_NVMF_QPAIR_ACTIVE:
|
||||||
|
case SPDK_NVMF_QPAIR_DEACTIVATING:
|
||||||
|
default:
|
||||||
|
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_EXECUTED);
|
||||||
|
spdk_nvmf_rdma_request_process(rtransport, rdma_req);
|
||||||
|
break;
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2004,44 +2433,6 @@ spdk_nvmf_rdma_close_qpair(struct spdk_nvmf_qpair *qpair)
|
|||||||
spdk_nvmf_rdma_qpair_destroy(SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair));
|
spdk_nvmf_rdma_qpair_destroy(SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
spdk_nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport,
|
|
||||||
struct spdk_nvmf_rdma_qpair *rqpair)
|
|
||||||
{
|
|
||||||
struct spdk_nvmf_rdma_recv *rdma_recv, *recv_tmp;
|
|
||||||
struct spdk_nvmf_rdma_request *rdma_req, *req_tmp;
|
|
||||||
|
|
||||||
/* We process I/O in the data transfer pending queue at the highest priority. */
|
|
||||||
TAILQ_FOREACH_SAFE(rdma_req, &rqpair->state_queue[RDMA_REQUEST_STATE_DATA_TRANSFER_PENDING],
|
|
||||||
state_link, req_tmp) {
|
|
||||||
if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The second highest priority is I/O waiting on memory buffers. */
|
|
||||||
TAILQ_FOREACH_SAFE(rdma_req, &rqpair->ch->pending_data_buf_queue, link,
|
|
||||||
req_tmp) {
|
|
||||||
if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The lowest priority is processing newly received commands */
|
|
||||||
TAILQ_FOREACH_SAFE(rdma_recv, &rqpair->incoming_queue, link, recv_tmp) {
|
|
||||||
if (TAILQ_EMPTY(&rqpair->state_queue[RDMA_REQUEST_STATE_FREE])) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
rdma_req = TAILQ_FIRST(&rqpair->state_queue[RDMA_REQUEST_STATE_FREE]);
|
|
||||||
rdma_req->recv = rdma_recv;
|
|
||||||
spdk_nvmf_rdma_request_set_state(rdma_req, RDMA_REQUEST_STATE_NEW);
|
|
||||||
if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct spdk_nvmf_rdma_request *
|
static struct spdk_nvmf_rdma_request *
|
||||||
get_rdma_req_from_wc(struct ibv_wc *wc)
|
get_rdma_req_from_wc(struct ibv_wc *wc)
|
||||||
{
|
{
|
||||||
@ -2112,8 +2503,8 @@ spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
|||||||
|
|
||||||
for (i = 0; i < reaped; i++) {
|
for (i = 0; i < reaped; i++) {
|
||||||
if (wc[i].status) {
|
if (wc[i].status) {
|
||||||
SPDK_ERRLOG("CQ error on CQ %p, Request 0x%lu (%d): %s\n",
|
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "CQ error on CQ %p, Request 0x%lu (%d): %s\n",
|
||||||
rpoller->cq, wc[i].wr_id, wc[i].status, ibv_wc_status_str(wc[i].status));
|
rpoller->cq, wc[i].wr_id, wc[i].status, ibv_wc_status_str(wc[i].status));
|
||||||
error = true;
|
error = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -2158,7 +2549,6 @@ spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
|
|||||||
rqpair = rdma_recv->qpair;
|
rqpair = rdma_recv->qpair;
|
||||||
|
|
||||||
TAILQ_INSERT_TAIL(&rqpair->incoming_queue, rdma_recv, link);
|
TAILQ_INSERT_TAIL(&rqpair->incoming_queue, rdma_recv, link);
|
||||||
|
|
||||||
/* Try to process other queued requests */
|
/* Try to process other queued requests */
|
||||||
spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair);
|
spdk_nvmf_rdma_qpair_process_pending(rtransport, rqpair);
|
||||||
break;
|
break;
|
||||||
|
@ -71,6 +71,7 @@ spdk_nvmf_request_complete(struct spdk_nvmf_request *req)
|
|||||||
assert(qpair->state_cb != NULL);
|
assert(qpair->state_cb != NULL);
|
||||||
|
|
||||||
if (TAILQ_EMPTY(&qpair->outstanding)) {
|
if (TAILQ_EMPTY(&qpair->outstanding)) {
|
||||||
|
|
||||||
qpair->state_cb(qpair->state_cb_arg, 0);
|
qpair->state_cb(qpair->state_cb_arg, 0);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -135,6 +136,8 @@ spdk_nvmf_request_exec(struct spdk_nvmf_request *req)
|
|||||||
if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) {
|
if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) {
|
||||||
req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
|
req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
|
||||||
req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
|
req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
|
||||||
|
/* Place the request on the outstanding list so we can keep track of it */
|
||||||
|
TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
|
||||||
spdk_nvmf_request_complete(req);
|
spdk_nvmf_request_complete(req);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user