From 3c88819bc0bd30760f623f0dbfd5873054038f91 Mon Sep 17 00:00:00 2001 From: Ziye Yang Date: Fri, 7 Dec 2018 05:48:51 +0800 Subject: [PATCH] nvmf/tcp: Use the common buffer cache for each polling group Purpose: To avoid the buffer contention among different polling groups if there are multiple core configurations for NVMe-oF tcp transport. Change-Id: I1c1b0126f3aad28f339ec8bf9355282e08cfa8db Signed-off-by: Ziye Yang Reviewed-on: https://review.gerrithub.io/c/440444 Tested-by: SPDK CI Jenkins Reviewed-by: Seth Howell Reviewed-by: Ben Walker Reviewed-by: Jim Harris --- lib/nvmf/tcp.c | 59 +++++++++++++++++++++---------- test/unit/lib/nvmf/tcp.c/tcp_ut.c | 24 ++++++++++--- 2 files changed, 61 insertions(+), 22 deletions(-) diff --git a/lib/nvmf/tcp.c b/lib/nvmf/tcp.c index 5dc9766de..faece743c 100644 --- a/lib/nvmf/tcp.c +++ b/lib/nvmf/tcp.c @@ -297,6 +297,10 @@ struct spdk_nvmf_tcp_transport { struct spdk_nvmf_tcp_mgmt_channel { /* Requests that are waiting to obtain a data buffer */ TAILQ_HEAD(, nvme_tcp_req) pending_data_buf_queue; + + /* Point to the transport polling group */ + struct spdk_nvmf_tcp_poll_group *tgroup; + }; static void spdk_nvmf_tcp_qpair_process_pending(struct spdk_nvmf_tcp_transport *ttransport, @@ -1301,6 +1305,7 @@ spdk_nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group) tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group); spdk_sock_group_close(&tgroup->sock_group); spdk_poller_unregister(&tgroup->timeout_poller); + free(tgroup); } @@ -2150,6 +2155,26 @@ spdk_nvmf_tcp_req_get_xfer(struct nvme_tcp_req *tcp_req) { return xfer; } +static void +spdk_nvmf_tcp_request_free_buffers(struct nvme_tcp_req *tcp_req, + struct spdk_nvmf_transport_poll_group *group, struct spdk_nvmf_transport *transport) +{ + for (uint32_t i = 0; i < tcp_req->req.iovcnt; i++) { + assert(tcp_req->buffers[i] != NULL); + if (group->buf_cache_count < group->buf_cache_size) { + STAILQ_INSERT_HEAD(&group->buf_cache, + (struct spdk_nvmf_transport_pg_cache_buf *)tcp_req->buffers[i], link); + group->buf_cache_count++; + } else { + spdk_mempool_put(transport->data_buf_pool, tcp_req->buffers[i]); + } + tcp_req->req.iov[i].iov_base = NULL; + tcp_req->buffers[i] = NULL; + tcp_req->req.iov[i].iov_len = 0; + } + tcp_req->data_from_pool = false; +} + static int spdk_nvmf_tcp_req_fill_iovs(struct spdk_nvmf_tcp_transport *ttransport, struct nvme_tcp_req *tcp_req) @@ -2157,12 +2182,20 @@ spdk_nvmf_tcp_req_fill_iovs(struct spdk_nvmf_tcp_transport *ttransport, void *buf = NULL; uint32_t length = tcp_req->req.length; uint32_t i = 0; + struct nvme_tcp_qpair *tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct nvme_tcp_qpair, qpair); + struct spdk_nvmf_transport_poll_group *group = &tqpair->ch->tgroup->group; tcp_req->req.iovcnt = 0; while (length) { - buf = spdk_mempool_get(ttransport->transport.data_buf_pool); - if (!buf) { - goto nomem; + if (!(STAILQ_EMPTY(&group->buf_cache))) { + group->buf_cache_count--; + buf = STAILQ_FIRST(&group->buf_cache); + STAILQ_REMOVE_HEAD(&group->buf_cache, link); + } else { + buf = spdk_mempool_get(ttransport->transport.data_buf_pool); + if (!buf) { + goto nomem; + } } tcp_req->req.iov[i].iov_base = (void *)((uintptr_t)(buf + NVMF_DATA_BUFFER_MASK) & @@ -2175,17 +2208,10 @@ spdk_nvmf_tcp_req_fill_iovs(struct spdk_nvmf_tcp_transport *ttransport, } tcp_req->data_from_pool = true; - return 0; nomem: - while (i) { - i--; - spdk_mempool_put(ttransport->transport.data_buf_pool, tcp_req->buffers[i]); - tcp_req->req.iov[i].iov_base = NULL; - tcp_req->req.iov[i].iov_len = 0; - - } + spdk_nvmf_tcp_request_free_buffers(tcp_req, group, &ttransport->transport); tcp_req->req.iovcnt = 0; return -ENOMEM; } @@ -2465,8 +2491,10 @@ spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, int rc; enum spdk_nvmf_tcp_req_state prev_state; bool progress = false; + struct spdk_nvmf_transport_poll_group *group; tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct nvme_tcp_qpair, qpair); + group = &tqpair->ch->tgroup->group; assert(tcp_req->state != TCP_REQUEST_STATE_FREE); /* The loop here is to allow for several back-to-back state changes. */ @@ -2590,13 +2618,7 @@ spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport, case TCP_REQUEST_STATE_COMPLETED: spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0); if (tcp_req->data_from_pool) { - /* Put the buffer/s back in the pool */ - for (uint32_t i = 0; i < tcp_req->req.iovcnt; i++) { - spdk_mempool_put(ttransport->transport.data_buf_pool, tcp_req->buffers[i]); - tcp_req->req.iov[i].iov_base = NULL; - tcp_req->buffers[i] = NULL; - } - tcp_req->data_from_pool = false; + spdk_nvmf_tcp_request_free_buffers(tcp_req, group, &ttransport->transport); } tcp_req->req.length = 0; tcp_req->req.iovcnt = 0; @@ -2697,6 +2719,7 @@ spdk_nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group, return -1; } + tqpair->ch->tgroup = tgroup; tqpair->state = NVME_TCP_QPAIR_STATE_INVALID; tqpair->timeout = SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT; tqpair->last_pdu_time = spdk_get_ticks(); diff --git a/test/unit/lib/nvmf/tcp.c/tcp_ut.c b/test/unit/lib/nvmf/tcp.c/tcp_ut.c index 62e4c92b0..ef9745c02 100644 --- a/test/unit/lib/nvmf/tcp.c/tcp_ut.c +++ b/test/unit/lib/nvmf/tcp.c/tcp_ut.c @@ -51,6 +51,7 @@ #define UT_IO_UNIT_SIZE 1024 #define UT_MAX_AQ_DEPTH 64 #define UT_SQ_HEAD_MAX 128 +#define UT_NUM_SHARED_BUFFERS 128 SPDK_LOG_REGISTER_COMPONENT("nvmf", SPDK_LOG_NVMF) SPDK_LOG_REGISTER_COMPONENT("nvme", SPDK_LOG_NVME) @@ -216,6 +217,7 @@ test_nvmf_tcp_create(void) opts.max_io_size = UT_MAX_IO_SIZE; opts.io_unit_size = UT_IO_UNIT_SIZE; opts.max_aq_depth = UT_MAX_AQ_DEPTH; + opts.num_shared_buffers = UT_NUM_SHARED_BUFFERS; /* expect success */ transport = spdk_nvmf_tcp_create(&opts); CU_ASSERT_PTR_NOT_NULL(transport); @@ -239,6 +241,7 @@ test_nvmf_tcp_create(void) opts.max_io_size = UT_MAX_IO_SIZE; opts.io_unit_size = UT_MAX_IO_SIZE + 1; opts.max_aq_depth = UT_MAX_AQ_DEPTH; + opts.num_shared_buffers = UT_NUM_SHARED_BUFFERS; /* expect success */ transport = spdk_nvmf_tcp_create(&opts); CU_ASSERT_PTR_NOT_NULL(transport); @@ -288,6 +291,7 @@ test_nvmf_tcp_destroy(void) opts.max_io_size = UT_MAX_IO_SIZE; opts.io_unit_size = UT_IO_UNIT_SIZE; opts.max_aq_depth = UT_MAX_AQ_DEPTH; + opts.num_shared_buffers = UT_NUM_SHARED_BUFFERS; transport = spdk_nvmf_tcp_create(&opts); CU_ASSERT_PTR_NOT_NULL(transport); transport->opts = opts; @@ -300,17 +304,29 @@ test_nvmf_tcp_destroy(void) static void test_nvmf_tcp_poll_group_create(void) { - struct spdk_nvmf_tcp_transport ttransport; + struct spdk_nvmf_transport *transport; struct spdk_nvmf_transport_poll_group *group; struct spdk_thread *thread; + struct spdk_nvmf_transport_opts opts; thread = spdk_thread_create(NULL); SPDK_CU_ASSERT_FATAL(thread != NULL); spdk_set_thread(thread); - memset(&ttransport, 0, sizeof(ttransport)); - group = spdk_nvmf_tcp_poll_group_create(&ttransport.transport); - CU_ASSERT_PTR_NOT_NULL(group); + memset(&opts, 0, sizeof(opts)); + opts.max_queue_depth = UT_MAX_QUEUE_DEPTH; + opts.max_qpairs_per_ctrlr = UT_MAX_QPAIRS_PER_CTRLR; + opts.in_capsule_data_size = UT_IN_CAPSULE_DATA_SIZE; + opts.max_io_size = UT_MAX_IO_SIZE; + opts.io_unit_size = UT_IO_UNIT_SIZE; + opts.max_aq_depth = UT_MAX_AQ_DEPTH; + opts.num_shared_buffers = UT_NUM_SHARED_BUFFERS; + transport = spdk_nvmf_tcp_create(&opts); + CU_ASSERT_PTR_NOT_NULL(transport); + transport->opts = opts; + group = spdk_nvmf_tcp_poll_group_create(transport); + SPDK_CU_ASSERT_FATAL(group); + group->transport = transport; spdk_nvmf_tcp_poll_group_destroy(group); spdk_thread_exit(thread);