rdma: Do not use the poller to handle the qpair exiting.
Generally, this patch did the following work: Remove the destruct poller. I think that we do not need this, the destruct poller is specially for Softwaare RoCE case. Since SoftRoCE will not have IBV_EVENT_QP_LAST_WQE_REACHED event, we will not wait the last_wqe_reached flag when srq is enabled. So we can avoid using the poller. And the purpose of this patch is to solve the coredump issue. For example, if we run rdma local test such as, e.g., test/nvmf/host/bdevperf.sh --transport=rdma The coredump reason: the qpair is freed twice. Because for RDMA transport, we do not really remove the qpair from the group if the upper layer does it. The first time is called by nvmf_rdma_destroy_drained_qpair in nvmf_rdma_poller_poll, and the second time is called by nvmf_rdma_qpair_reject_connection in in nvme_rdma_close_qpair. Since nvme_rdma_close_qpair will always called, so we need make sure that the qpair will be close after calling this function. Otherwise we will have the double free qpair. So our approach here is add a flag ("to_close")in rqpair structure and make sure the rqpair be freed after the "to_close" is set nvme_rdma_close_qpair Signed-off-by: Ziye Yang <ziye.yang@intel.com> Change-Id: I6f97debbcd29bbb7c6e3f9725907b4102a1d2892 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3661 Community-CI: Broadcom CI Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Seth Howell <seth.howell@intel.com>
This commit is contained in:
parent
ea80818901
commit
85ff3fcea6
@ -61,9 +61,6 @@ const struct spdk_nvmf_transport_ops spdk_nvmf_transport_rdma;
|
|||||||
#define DEFAULT_NVMF_RDMA_CQ_SIZE 4096
|
#define DEFAULT_NVMF_RDMA_CQ_SIZE 4096
|
||||||
#define MAX_WR_PER_QP(queue_depth) (queue_depth * 3 + 2)
|
#define MAX_WR_PER_QP(queue_depth) (queue_depth * 3 + 2)
|
||||||
|
|
||||||
/* Timeout for destroying defunct rqpairs */
|
|
||||||
#define NVMF_RDMA_QPAIR_DESTROY_TIMEOUT_US 4000000
|
|
||||||
|
|
||||||
static int g_spdk_nvmf_ibv_query_mask =
|
static int g_spdk_nvmf_ibv_query_mask =
|
||||||
IBV_QP_STATE |
|
IBV_QP_STATE |
|
||||||
IBV_QP_PKEY_INDEX |
|
IBV_QP_PKEY_INDEX |
|
||||||
@ -384,12 +381,6 @@ struct spdk_nvmf_rdma_qpair {
|
|||||||
*/
|
*/
|
||||||
enum ibv_qp_state ibv_state;
|
enum ibv_qp_state ibv_state;
|
||||||
|
|
||||||
/* Poller registered in case the qpair doesn't properly
|
|
||||||
* complete the qpair destruct process and becomes defunct.
|
|
||||||
*/
|
|
||||||
|
|
||||||
struct spdk_poller *destruct_poller;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* io_channel which is used to destroy qpair when it is removed from poll group
|
* io_channel which is used to destroy qpair when it is removed from poll group
|
||||||
*/
|
*/
|
||||||
@ -400,6 +391,9 @@ struct spdk_nvmf_rdma_qpair {
|
|||||||
|
|
||||||
/* Lets us know that we have received the last_wqe event. */
|
/* Lets us know that we have received the last_wqe event. */
|
||||||
bool last_wqe_reached;
|
bool last_wqe_reached;
|
||||||
|
|
||||||
|
/* Indicate that nvmf_rdma_close_qpair is called */
|
||||||
|
bool to_close;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct spdk_nvmf_rdma_poller_stat {
|
struct spdk_nvmf_rdma_poller_stat {
|
||||||
@ -829,8 +823,6 @@ nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
|
|||||||
|
|
||||||
spdk_trace_record(TRACE_RDMA_QP_DESTROY, 0, 0, (uintptr_t)rqpair->cm_id, 0);
|
spdk_trace_record(TRACE_RDMA_QP_DESTROY, 0, 0, (uintptr_t)rqpair->cm_id, 0);
|
||||||
|
|
||||||
spdk_poller_unregister(&rqpair->destruct_poller);
|
|
||||||
|
|
||||||
if (rqpair->qd != 0) {
|
if (rqpair->qd != 0) {
|
||||||
struct spdk_nvmf_qpair *qpair = &rqpair->qpair;
|
struct spdk_nvmf_qpair *qpair = &rqpair->qpair;
|
||||||
struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(qpair->transport,
|
struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(qpair->transport,
|
||||||
@ -2746,12 +2738,19 @@ nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvmf_rdma_destroy_drained_qpair(void *ctx)
|
static void
|
||||||
|
nvmf_rdma_destroy_drained_qpair(struct spdk_nvmf_rdma_qpair *rqpair)
|
||||||
{
|
{
|
||||||
struct spdk_nvmf_rdma_qpair *rqpair = ctx;
|
|
||||||
struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport,
|
struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport,
|
||||||
struct spdk_nvmf_rdma_transport, transport);
|
struct spdk_nvmf_rdma_transport, transport);
|
||||||
|
|
||||||
|
nvmf_rdma_qpair_process_pending(rtransport, rqpair, true);
|
||||||
|
|
||||||
|
/* nvmr_rdma_close_qpair is not called */
|
||||||
|
if (!rqpair->to_close) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* In non SRQ path, we will reach rqpair->max_queue_depth. In SRQ path, we will get the last_wqe event. */
|
/* In non SRQ path, we will reach rqpair->max_queue_depth. In SRQ path, we will get the last_wqe event. */
|
||||||
if (rqpair->current_send_depth != 0) {
|
if (rqpair->current_send_depth != 0) {
|
||||||
return;
|
return;
|
||||||
@ -2761,21 +2760,19 @@ static void nvmf_rdma_destroy_drained_qpair(void *ctx)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rqpair->srq != NULL && rqpair->last_wqe_reached == false) {
|
/* Judge whether the device is emulated by Software RoCE.
|
||||||
|
* And it will not send last_wqe event
|
||||||
|
*/
|
||||||
|
if (rqpair->srq != NULL && rqpair->device->attr.vendor_id != 0 &&
|
||||||
|
rqpair->last_wqe_reached == false) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
nvmf_rdma_qpair_process_pending(rtransport, rqpair, true);
|
assert(rqpair->qpair.state == SPDK_NVMF_QPAIR_ERROR);
|
||||||
|
|
||||||
/* Qpair will be destroyed after nvmf layer closes this qpair */
|
|
||||||
if (rqpair->qpair.state != SPDK_NVMF_QPAIR_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
nvmf_rdma_qpair_destroy(rqpair);
|
nvmf_rdma_qpair_destroy(rqpair);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
nvmf_rdma_disconnect(struct rdma_cm_event *evt)
|
nvmf_rdma_disconnect(struct rdma_cm_event *evt)
|
||||||
{
|
{
|
||||||
@ -3540,27 +3537,13 @@ nvmf_rdma_request_complete(struct spdk_nvmf_request *req)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
nvmf_rdma_destroy_defunct_qpair(void *ctx)
|
|
||||||
{
|
|
||||||
struct spdk_nvmf_rdma_qpair *rqpair = ctx;
|
|
||||||
struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport,
|
|
||||||
struct spdk_nvmf_rdma_transport, transport);
|
|
||||||
|
|
||||||
SPDK_INFOLOG(SPDK_LOG_RDMA, "QP#%d hasn't been drained as expected, manually destroy it\n",
|
|
||||||
rqpair->qpair.qid);
|
|
||||||
|
|
||||||
nvmf_rdma_qpair_process_pending(rtransport, rqpair, true);
|
|
||||||
nvmf_rdma_qpair_destroy(rqpair);
|
|
||||||
|
|
||||||
return SPDK_POLLER_BUSY;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
nvmf_rdma_close_qpair(struct spdk_nvmf_qpair *qpair)
|
nvmf_rdma_close_qpair(struct spdk_nvmf_qpair *qpair)
|
||||||
{
|
{
|
||||||
struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
|
struct spdk_nvmf_rdma_qpair *rqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_rdma_qpair, qpair);
|
||||||
|
|
||||||
|
rqpair->to_close = true;
|
||||||
|
|
||||||
/* This happens only when the qpair is disconnected before
|
/* This happens only when the qpair is disconnected before
|
||||||
* it is added to the poll group. Since there is no poll group,
|
* it is added to the poll group. Since there is no poll group,
|
||||||
* the RDMA qp has not been initialized yet and the RDMA CM
|
* the RDMA qp has not been initialized yet and the RDMA CM
|
||||||
@ -3575,8 +3558,7 @@ nvmf_rdma_close_qpair(struct spdk_nvmf_qpair *qpair)
|
|||||||
spdk_rdma_qp_disconnect(rqpair->rdma_qp);
|
spdk_rdma_qp_disconnect(rqpair->rdma_qp);
|
||||||
}
|
}
|
||||||
|
|
||||||
rqpair->destruct_poller = SPDK_POLLER_REGISTER(nvmf_rdma_destroy_defunct_qpair, (void *)rqpair,
|
nvmf_rdma_destroy_drained_qpair(rqpair);
|
||||||
NVMF_RDMA_QPAIR_DESTROY_TIMEOUT_US);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct spdk_nvmf_rdma_qpair *
|
static struct spdk_nvmf_rdma_qpair *
|
||||||
|
Loading…
Reference in New Issue
Block a user