rdma: Add synchronization for LAST_WQE_REACHED event

The following scenario might occur when nvmf_tgt is stopped:
1. nvmf_tgt receives SIGINT, changes state to NVMF_TGT_FINI_STOP_SUBSYSTEMS
2. In this state nvmf_tgt stops all subsystems and disconnects associated qpairs
3. In the case of RDMA qpair, its state will be changed to IBV_QPS_ERR.
Once qpair changes the state to IBV_QPS_ERR, RDMA device generates
LAST_WQE_REACHED event when there are no more WQE that can be sonsumed
from the SRQ by this qpair.
4. When all subsystems are stopped, some of qpair may still be alive since they
haven't received LAST_WQE_REACHED event yet.
5. nvmf_tgt stops all poll groups and forcefully destroyes any qpairs linked to them.
6. At this moment LAST_WQE_REACHED event might be generated and received in another thread.
Handler of this event sends a message with a pointer to qpair. The qpair itself may already
be destroyed.
7. Thread that owned qpair receives a message (LAST_WQE_REACHED) with a pointer to alredy destroyed qpair and
destroyes it for the second time when all pointer are invalid.

ibv events related to qpair should be handled by the thread that
owns this qpair. This commit adds a new structure that describes
ibv event, helper functions for sending the event and a list
of events per rdma qpair; add syncronization for LAST_WQE_REACHED event

Fixes #1075

Signed-off-by: Alexey Marchuk <alexeymar@mellanox.com>
Signed-off-by: Sasha Kotchubievsky <sashakot@mellanox.com>
Signed-off-by: Evgeniy Kochetov <evgeniik@mellanox.com>
Change-Id: I22bff89741708df2518760934ecb4e33fad49473
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/476712
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom SPDK FC-NVMe CI <spdk-ci.pdl@broadcom.com>
Community-CI: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Seth Howell <seth.howell@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
Alexey Marchuk 2019-12-03 17:50:38 +03:00 committed by Tomasz Zawadzki
parent 54c3d1bc67
commit dc84fbaaa1

View File

@ -330,6 +330,15 @@ struct spdk_nvmf_rdma_resources {
STAILQ_HEAD(, spdk_nvmf_rdma_request) free_queue;
};
typedef void (*spdk_nvmf_rdma_qpair_ibv_event)(struct spdk_nvmf_rdma_qpair *rqpair);
struct spdk_nvmf_rdma_ibv_event_ctx {
struct spdk_nvmf_rdma_qpair *rqpair;
spdk_nvmf_rdma_qpair_ibv_event cb_fn;
/* Link to other ibv events associated with this qpair */
STAILQ_ENTRY(spdk_nvmf_rdma_ibv_event_ctx) link;
};
struct spdk_nvmf_rdma_qpair {
struct spdk_nvmf_qpair qpair;
@ -399,6 +408,9 @@ struct spdk_nvmf_rdma_qpair {
struct spdk_poller *destruct_poller;
/* List of ibv async events */
STAILQ_HEAD(, spdk_nvmf_rdma_ibv_event_ctx) ibv_events;
/* There are several ways a disconnect can start on a qpair
* and they are not all mutually exclusive. It is important
* that we only initialize one of these paths.
@ -898,6 +910,17 @@ cleanup:
return NULL;
}
static void
spdk_nvmf_rdma_qpair_clean_ibv_events(struct spdk_nvmf_rdma_qpair *rqpair)
{
struct spdk_nvmf_rdma_ibv_event_ctx *ctx, *tctx;
STAILQ_FOREACH_SAFE(ctx, &rqpair->ibv_events, link, tctx) {
ctx->rqpair = NULL;
/* Memory allocated for ctx is freed in spdk_nvmf_rdma_qpair_process_ibv_event */
STAILQ_REMOVE(&rqpair->ibv_events, ctx, spdk_nvmf_rdma_ibv_event_ctx, link);
}
}
static void
spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
{
@ -948,6 +971,8 @@ spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
nvmf_rdma_resources_destroy(rqpair->resources);
}
spdk_nvmf_rdma_qpair_clean_ibv_events(rqpair);
free(rqpair);
}
@ -1331,6 +1356,7 @@ nvmf_rdma_connect(struct spdk_nvmf_transport *transport, struct rdma_cm_event *e
rqpair->cm_id = event->id;
rqpair->listen_id = event->listen_id;
rqpair->qpair.transport = transport;
STAILQ_INIT(&rqpair->ibv_events);
/* use qid from the private data to determine the qpair type
qid will be set to the appropriate value when the controller is created */
rqpair->qpair.qid = private_data->qid;
@ -2932,15 +2958,6 @@ static const char *CM_EVENT_STR[] = {
};
#endif /* DEBUG */
static void
nvmf_rdma_handle_last_wqe_reached(void *ctx)
{
struct spdk_nvmf_rdma_qpair *rqpair = ctx;
rqpair->last_wqe_reached = true;
nvmf_rdma_destroy_drained_qpair(rqpair);
}
static void
spdk_nvmf_process_cm_event(struct spdk_nvmf_transport *transport, new_qpair_fn cb_fn, void *cb_arg)
{
@ -3021,6 +3038,50 @@ spdk_nvmf_process_cm_event(struct spdk_nvmf_transport *transport, new_qpair_fn c
}
}
static void
nvmf_rdma_handle_last_wqe_reached(struct spdk_nvmf_rdma_qpair *rqpair)
{
rqpair->last_wqe_reached = true;
nvmf_rdma_destroy_drained_qpair(rqpair);
}
static void
spdk_nvmf_rdma_qpair_process_ibv_event(void *ctx)
{
struct spdk_nvmf_rdma_ibv_event_ctx *event_ctx = ctx;
if (event_ctx->rqpair) {
STAILQ_REMOVE(&event_ctx->rqpair->ibv_events, event_ctx, spdk_nvmf_rdma_ibv_event_ctx, link);
if (event_ctx->cb_fn) {
event_ctx->cb_fn(event_ctx->rqpair);
}
}
free(event_ctx);
}
static int
spdk_nvmf_rdma_send_qpair_async_event(struct spdk_nvmf_rdma_qpair *rqpair,
spdk_nvmf_rdma_qpair_ibv_event fn)
{
struct spdk_nvmf_rdma_ibv_event_ctx *ctx;
if (!rqpair->qpair.group) {
return EINVAL;
}
ctx = calloc(1, sizeof(*ctx));
if (!ctx) {
return ENOMEM;
}
ctx->rqpair = rqpair;
ctx->cb_fn = fn;
STAILQ_INSERT_TAIL(&rqpair->ibv_events, ctx, link);
return spdk_thread_send_msg(rqpair->qpair.group->thread, spdk_nvmf_rdma_qpair_process_ibv_event,
ctx);
}
static void
spdk_nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device)
{
@ -3050,14 +3111,10 @@ spdk_nvmf_process_ib_event(struct spdk_nvmf_rdma_device *device)
/* This event only occurs for shared receive queues. */
rqpair = event.element.qp->qp_context;
SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Last WQE reached event received for rqpair %p\n", rqpair);
/* This must be handled on the polling thread if it exists. Otherwise the timeout will catch it. */
if (rqpair->qpair.group) {
spdk_thread_send_msg(rqpair->qpair.group->thread, nvmf_rdma_handle_last_wqe_reached, rqpair);
} else {
SPDK_ERRLOG("Unable to destroy the qpair %p since it does not have a poll group.\n", rqpair);
if (spdk_nvmf_rdma_send_qpair_async_event(rqpair, nvmf_rdma_handle_last_wqe_reached)) {
SPDK_ERRLOG("Failed to send LAST_WQE_REACHED event for rqpair %p\n", rqpair);
rqpair->last_wqe_reached = true;
}
break;
case IBV_EVENT_SQ_DRAINED:
/* This event occurs frequently in both error and non-error states.