From bf602f12ca131a26cff588068869cdfd5cce422d Mon Sep 17 00:00:00 2001 From: Alexey Marchuk Date: Fri, 16 Oct 2020 16:37:48 +0300 Subject: [PATCH] nvmf/tcp: Support ICD for fabric/admin commands According to the SPEC we should support up to 8192 bytes of ICD for admin and fabric commands. Transport configuration parameter in_capsule_data_size is applied to all qpair types - admin and IO. Also we allocate resources when we get a connection request, so we don't know qpair type at this moment. Create a list of buffer in TCP poll group to support ICD up to 8192 bytes when configuration ICD is less than this value. The number of elements in this pool is hardcoded, it is planned to add a new configuration parameter later. Fixes issue #1569 Signed-off-by: Alexey Marchuk Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4754 (master) (cherry picked from commit 85fa43241bc236d4bcb91a261d565352f6ff3559) Change-Id: I8589e3e2ea95d515f5503c6de7c1ee40aaf7b6da Signed-off-by: Tomasz Zawadzki Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4886 Tested-by: SPDK CI Jenkins Reviewed-by: Aleksey Marchuk Reviewed-by: Shuhei Matsumoto --- include/spdk_internal/nvme_tcp.h | 2 +- lib/nvme/nvme_tcp.c | 3 +- lib/nvmf/tcp.c | 135 ++++++++++++++++++++++++++++-- test/unit/lib/nvmf/tcp.c/tcp_ut.c | 5 ++ 4 files changed, 134 insertions(+), 11 deletions(-) diff --git a/include/spdk_internal/nvme_tcp.h b/include/spdk_internal/nvme_tcp.h index 7065bc060..ce1b08edc 100644 --- a/include/spdk_internal/nvme_tcp.h +++ b/include/spdk_internal/nvme_tcp.h @@ -43,7 +43,7 @@ #define SPDK_NVME_TCP_DIGEST_ALIGNMENT 4 #define SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT 30 #define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 8 - +#define SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE 8192u /* * Maximum number of SGL elements. */ diff --git a/lib/nvme/nvme_tcp.c b/lib/nvme/nvme_tcp.c index 4ca684188..1480d0d51 100644 --- a/lib/nvme/nvme_tcp.c +++ b/lib/nvme/nvme_tcp.c @@ -57,7 +57,6 @@ #define NVME_TCP_HPDA_DEFAULT 0 #define NVME_TCP_MAX_R2T_DEFAULT 1 #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096 -#define NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE 8192 /* NVMe TCP transport extensions for spdk_nvme_ctrlr */ struct nvme_tcp_ctrlr { @@ -518,7 +517,7 @@ nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req, if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) { max_incapsule_data_size = ctrlr->ioccsz_bytes; if ((req->cmd.opc == SPDK_NVME_OPC_FABRIC) || nvme_qpair_is_admin_queue(&tqpair->qpair)) { - max_incapsule_data_size = NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE; + max_incapsule_data_size = SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE; } if (req->payload_size <= max_incapsule_data_size) { diff --git a/lib/nvmf/tcp.c b/lib/nvmf/tcp.c index 631fc6794..96d491bf8 100644 --- a/lib/nvmf/tcp.c +++ b/lib/nvmf/tcp.c @@ -254,12 +254,23 @@ struct spdk_nvmf_tcp_qpair { TAILQ_ENTRY(spdk_nvmf_tcp_qpair) link; }; +struct spdk_nvmf_tcp_control_msg { + STAILQ_ENTRY(spdk_nvmf_tcp_control_msg) link; +}; + +struct spdk_nvmf_tcp_control_msg_list { + void *msg_buf; + STAILQ_HEAD(, spdk_nvmf_tcp_control_msg) free_msgs; +}; + struct spdk_nvmf_tcp_poll_group { struct spdk_nvmf_transport_poll_group group; struct spdk_sock_group *sock_group; TAILQ_HEAD(, spdk_nvmf_tcp_qpair) qpairs; TAILQ_HEAD(, spdk_nvmf_tcp_qpair) await_req; + + struct spdk_nvmf_tcp_control_msg_list *control_msg_list; }; struct spdk_nvmf_tcp_port { @@ -295,6 +306,7 @@ static const struct spdk_json_object_decoder tcp_transport_opts_decoder[] = { static bool nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, struct spdk_nvmf_tcp_req *tcp_req); +static void nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group); static void nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req, @@ -1001,6 +1013,49 @@ nvmf_tcp_discover(struct spdk_nvmf_transport *transport, entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE; } +static struct spdk_nvmf_tcp_control_msg_list * +nvmf_tcp_control_msg_list_create(uint16_t num_messages) +{ + struct spdk_nvmf_tcp_control_msg_list *list; + struct spdk_nvmf_tcp_control_msg *msg; + uint16_t i; + + list = calloc(1, sizeof(*list)); + if (!list) { + SPDK_ERRLOG("Failed to allocate memory for list structure\n"); + return NULL; + } + + list->msg_buf = spdk_zmalloc(num_messages * SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE, + NVMF_DATA_BUFFER_ALIGNMENT, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA); + if (!list->msg_buf) { + SPDK_ERRLOG("Failed to allocate memory for control message buffers\n"); + free(list); + return NULL; + } + + STAILQ_INIT(&list->free_msgs); + + for (i = 0; i < num_messages; i++) { + msg = (struct spdk_nvmf_tcp_control_msg *)((char *)list->msg_buf + i * + SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE); + STAILQ_INSERT_TAIL(&list->free_msgs, msg, link); + } + + return list; +} + +static void +nvmf_tcp_control_msg_list_free(struct spdk_nvmf_tcp_control_msg_list *list) +{ + if (!list) { + return; + } + + spdk_free(list->msg_buf); + free(list); +} + static struct spdk_nvmf_transport_poll_group * nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport) { @@ -1019,10 +1074,20 @@ nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport) TAILQ_INIT(&tgroup->qpairs); TAILQ_INIT(&tgroup->await_req); + if (transport->opts.in_capsule_data_size < SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE) { + SPDK_DEBUGLOG(nvmf_tcp, "ICD %u is less than min required for admin/fabric commands (%u). " + "Creating control messages list\n", transport->opts.in_capsule_data_size, + SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE); + tgroup->control_msg_list = nvmf_tcp_control_msg_list_create(32); + if (!tgroup->control_msg_list) { + goto cleanup; + } + } + return &tgroup->group; cleanup: - free(tgroup); + nvmf_tcp_poll_group_destroy(&tgroup->group); return NULL; } @@ -1049,6 +1114,9 @@ nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); spdk_sock_group_close(&tgroup->sock_group); + if (tgroup->control_msg_list) { + nvmf_tcp_control_msg_list_free(tgroup->control_msg_list); + } free(tgroup); } @@ -1867,6 +1935,31 @@ nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair) return rc; } +static inline void * +nvmf_tcp_control_msg_get(struct spdk_nvmf_tcp_control_msg_list *list) +{ + struct spdk_nvmf_tcp_control_msg *msg; + + assert(list); + + msg = STAILQ_FIRST(&list->free_msgs); + if (!msg) { + SPDK_DEBUGLOG(nvmf_tcp, "Out of control messages\n"); + return NULL; + } + STAILQ_REMOVE_HEAD(&list->free_msgs, link); + return msg; +} + +static inline void +nvmf_tcp_control_msg_put(struct spdk_nvmf_tcp_control_msg_list *list, void *_msg) +{ + struct spdk_nvmf_tcp_control_msg *msg = _msg; + + assert(list); + STAILQ_INSERT_HEAD(&list->free_msgs, msg, link); +} + static int nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req, struct spdk_nvmf_transport *transport, @@ -1876,6 +1969,7 @@ nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req, struct spdk_nvme_cmd *cmd; struct spdk_nvme_cpl *rsp; struct spdk_nvme_sgl_descriptor *sgl; + struct spdk_nvmf_tcp_poll_group *tgroup; uint32_t length; cmd = &req->cmd->nvme_cmd; @@ -1922,6 +2016,7 @@ nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req, sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) { uint64_t offset = sgl->address; uint32_t max_len = transport->opts.in_capsule_data_size; + assert(tcp_req->has_incapsule_data); SPDK_DEBUGLOG(nvmf_tcp, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n", offset, length); @@ -1934,16 +2029,33 @@ nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req, } max_len -= (uint32_t)offset; - if (length > max_len) { - SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n", - length, max_len); - rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; - return -1; + if (spdk_unlikely(length > max_len)) { + /* According to the SPEC we should support ICD up to 8192 bytes for admin and fabric commands */ + if (length <= SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE && + (cmd->opc == SPDK_NVME_OPC_FABRIC || req->qpair->qid == 0)) { + + /* Get a buffer from dedicated list */ + SPDK_DEBUGLOG(nvmf_tcp, "Getting a buffer from control msg list\n"); + tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); + assert(tgroup->control_msg_list); + req->data = nvmf_tcp_control_msg_get(tgroup->control_msg_list); + if (!req->data) { + /* No available buffers. Queue this request up. */ + SPDK_DEBUGLOG(nvmf_tcp, "No available ICD buffers. Queueing request %p\n", tcp_req); + return 0; + } + } else { + SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n", + length, max_len); + rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID; + return -1; + } + } else { + req->data = tcp_req->buf; } - req->data = tcp_req->buf + offset; - req->data_from_pool = false; req->length = length; + req->data_from_pool = false; if (spdk_unlikely(req->dif.dif_insert_or_strip)) { length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx); @@ -2130,6 +2242,7 @@ nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, bool progress = false; struct spdk_nvmf_transport *transport = &ttransport->transport; struct spdk_nvmf_transport_poll_group *group; + struct spdk_nvmf_tcp_poll_group *tgroup; tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair); group = &tqpair->group->group; @@ -2301,6 +2414,12 @@ nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0); if (tcp_req->req.data_from_pool) { spdk_nvmf_request_free_buffers(&tcp_req->req, group, transport); + } else if (spdk_unlikely(tcp_req->has_incapsule_data && (tcp_req->cmd.opc == SPDK_NVME_OPC_FABRIC || + tqpair->qpair.qid == 0) && tcp_req->req.length > transport->opts.in_capsule_data_size)) { + tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); + assert(tgroup->control_msg_list); + SPDK_DEBUGLOG(nvmf_tcp, "Put buf to control msg list\n"); + nvmf_tcp_control_msg_put(tgroup->control_msg_list, tcp_req->req.data); } tcp_req->req.length = 0; tcp_req->req.iovcnt = 0; diff --git a/test/unit/lib/nvmf/tcp.c/tcp_ut.c b/test/unit/lib/nvmf/tcp.c/tcp_ut.c index a8a22f1cc..80869967d 100644 --- a/test/unit/lib/nvmf/tcp.c/tcp_ut.c +++ b/test/unit/lib/nvmf/tcp.c/tcp_ut.c @@ -459,6 +459,7 @@ test_nvmf_tcp_poll_group_create(void) { struct spdk_nvmf_transport *transport; struct spdk_nvmf_transport_poll_group *group; + struct spdk_nvmf_tcp_poll_group *tgroup; struct spdk_thread *thread; struct spdk_nvmf_transport_opts opts; struct spdk_sock_group grp = {}; @@ -482,6 +483,10 @@ test_nvmf_tcp_poll_group_create(void) group = nvmf_tcp_poll_group_create(transport); MOCK_CLEAR_P(spdk_sock_group_create); SPDK_CU_ASSERT_FATAL(group); + if (opts.in_capsule_data_size < SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE) { + tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); + SPDK_CU_ASSERT_FATAL(tgroup->control_msg_list); + } group->transport = transport; nvmf_tcp_poll_group_destroy(group); nvmf_tcp_destroy(transport);