From 6b86039fd9fee1ae2b0b15c5601c0f9a506a26cb Mon Sep 17 00:00:00 2001 From: Ben Walker Date: Fri, 5 Mar 2021 14:00:12 -0700 Subject: [PATCH] nvme/tcp: Ensure qpair is polled when it gets a writev_async completion There was a fix for this that went into the posix layer, but the underlying problem is the logic in the nvme/tcp transport. Attempt to fix that instead. Change-Id: I04dd850bb201641d441c8c1f88c7bb8ba1d09e58 Signed-off-by: Ben Walker Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6751 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Aleksey Marchuk --- lib/nvme/nvme_tcp.c | 44 +++++++++++++++++++++++++++++++++++++++ module/sock/posix/posix.c | 8 ------- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/lib/nvme/nvme_tcp.c b/lib/nvme/nvme_tcp.c index 0d56e5e21..17c9ca88b 100644 --- a/lib/nvme/nvme_tcp.c +++ b/lib/nvme/nvme_tcp.c @@ -68,6 +68,8 @@ struct nvme_tcp_poll_group { struct spdk_sock_group *sock_group; uint32_t completions_per_qpair; int64_t num_completions; + + TAILQ_HEAD(, nvme_tcp_qpair) needs_poll; }; /* NVMe TCP qpair extensions for spdk_nvme_qpair */ @@ -105,6 +107,9 @@ struct nvme_tcp_qpair { uint8_t cpda; enum nvme_tcp_qpair_state state; + + TAILQ_ENTRY(nvme_tcp_qpair) link; + bool needs_poll; }; enum nvme_tcp_req_state { @@ -301,6 +306,13 @@ nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); struct nvme_tcp_pdu *pdu; int rc; + struct nvme_tcp_poll_group *group; + + if (tqpair->needs_poll) { + group = nvme_tcp_poll_group(qpair->poll_group); + TAILQ_REMOVE(&group->needs_poll, tqpair, link); + tqpair->needs_poll = false; + } rc = spdk_sock_close(&tqpair->sock); @@ -365,6 +377,18 @@ _pdu_write_done(void *cb_arg, int err) { struct nvme_tcp_pdu *pdu = cb_arg; struct nvme_tcp_qpair *tqpair = pdu->qpair; + struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group); + + /* If there are queued requests, we assume they are queued because they are waiting + * for resources to be released. Those resources are almost certainly released in + * response to a PDU completing here. However, to attempt to make forward progress + * the qpair needs to be polled and we can't rely on another network event to make + * that happen. Add it to a list of qpairs to poll regardless of network activity + * here. */ + if (pgroup && !STAILQ_EMPTY(&tqpair->qpair.queued_req) && !tqpair->needs_poll) { + TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link); + tqpair->needs_poll = true; + } TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq); @@ -1690,6 +1714,12 @@ nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_soc struct spdk_nvme_qpair *qpair = ctx; struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group); int32_t num_completions; + struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); + + if (tqpair->needs_poll) { + TAILQ_REMOVE(&pgroup->needs_poll, tqpair, link); + tqpair->needs_poll = false; + } num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair); @@ -2046,6 +2076,8 @@ nvme_tcp_poll_group_create(void) return NULL; } + TAILQ_INIT(&group->needs_poll); + group->sock_group = spdk_sock_group_create(group); if (group->sock_group == NULL) { free(group); @@ -2089,6 +2121,11 @@ nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair) struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group); struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair); + if (tqpair->needs_poll) { + TAILQ_REMOVE(&group->needs_poll, tqpair, link); + tqpair->needs_poll = false; + } + if (tqpair->sock && group->sock_group) { if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) { return -EPROTO; @@ -2131,6 +2168,7 @@ nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *t { struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup); struct spdk_nvme_qpair *qpair, *tmp_qpair; + struct nvme_tcp_qpair *tqpair, *tmp_tqpair; group->completions_per_qpair = completions_per_qpair; group->num_completions = 0; @@ -2141,6 +2179,12 @@ nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *t disconnected_qpair_cb(qpair, tgroup->group->ctx); } + /* If any qpairs were marked as needing to be polled due to an asynchronous write completion + * and they weren't polled as a consequence of calling spdk_sock_group_poll above, poll them now. */ + TAILQ_FOREACH_SAFE(tqpair, &group->needs_poll, link, tmp_tqpair) { + nvme_tcp_qpair_sock_cb(&tqpair->qpair, group->sock_group, tqpair->sock); + } + return group->num_completions; } diff --git a/module/sock/posix/posix.c b/module/sock/posix/posix.c index 2ef28f0c4..a5612ab5c 100644 --- a/module/sock/posix/posix.c +++ b/module/sock/posix/posix.c @@ -663,7 +663,6 @@ static int _sock_check_zcopy(struct spdk_sock *sock) { struct spdk_posix_sock *psock = __posix_sock(sock); - struct spdk_posix_sock_group_impl *group = __posix_group_impl(sock->group_impl); struct msghdr msgh = {}; uint8_t buf[sizeof(struct cmsghdr) + sizeof(struct sock_extended_err)]; ssize_t rc; @@ -726,13 +725,6 @@ _sock_check_zcopy(struct spdk_sock *sock) break; } } - - /* If we reaped buffer reclaim notification and sock is not in pending_events list yet, - * add it now. It allows to call socket callback and process completions */ - if (found && !psock->pending_events && group) { - psock->pending_events = true; - TAILQ_INSERT_TAIL(&group->pending_events, psock, link); - } } }