nvmf/tcp: Simplify qpair resource initialization

The resources allocated to a queue pair do not need to be directly
correlated to the queue size requested by the initiator in NVMe-oF, as
long as enough resources are present. The RDMA transport, for instance,
does complex pooling of the resources behind the scenes when using a
shared receive queue.

Simplify the resource allocation for a TCP qpair to just always allocate
the max allowed queue size right away. This is a configurable parameter,
so system administrators can adjust for their needs. The initiator may
then request a queue size less than or equal to that, which will only be
enforced by queue depth counting and not impact the actual number of
resources allocated on the target.

This change relies on the MaxC2HSize being equal to the Maximum Data
Transfer Size (MDTS) reported. That is the default configuration, but
MDTS is configurable. Changing the MDTS with this patch to a value
larger than 128k will cause the target to break. This is addressed in
the next patch in this series.

Change-Id: Ibd4723785c6a4d8d444f9b7bbfa89f98de2320f5
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/479733
Community-CI: SPDK CI Jenkins <sys_sgci@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Alexey Marchuk <alexeymar@mellanox.com>
This commit is contained in:
Ben Walker 2020-01-08 12:20:28 -07:00 committed by Tomasz Zawadzki
parent 444cf90c72
commit 4dba507224
7 changed files with 66 additions and 176 deletions

View File

@ -250,11 +250,6 @@ struct spdk_nvmf_transport_ops {
int (*qpair_get_listen_trid)(struct spdk_nvmf_qpair *qpair,
struct spdk_nvme_transport_id *trid);
/*
* set the submission queue size of the queue pair
*/
int (*qpair_set_sqsize)(struct spdk_nvmf_qpair *qpair);
/*
* Get transport poll group statistics
*/

View File

@ -593,12 +593,6 @@ spdk_nvmf_ctrlr_connect(struct spdk_nvmf_request *req)
qpair->sq_head_max = cmd->sqsize;
qpair->qid = cmd->qid;
if (spdk_nvmf_transport_qpair_set_sqsize(qpair)) {
SPDK_ERRLOG("Can not create SQSIZE %u for qpair=%p\n", cmd->sqsize, qpair);
rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
if (0 == qpair->qid) {
qpair->group->stat.admin_qpairs++;
} else {

View File

@ -54,7 +54,6 @@
#define NVMF_TCP_PDU_MAX_H2C_DATA_SIZE 131072
#define NVMF_TCP_PDU_MAX_C2H_DATA_SIZE 131072
#define NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM 64 /* Maximal c2h_data pdu number for ecah tqpair */
#define SPDK_NVMF_TCP_DEFAULT_MAX_SOCK_PRIORITY 6
#define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 4
@ -216,8 +215,13 @@ struct spdk_nvmf_tcp_qpair {
TAILQ_HEAD(, nvme_tcp_pdu) send_queue;
TAILQ_HEAD(, nvme_tcp_pdu) free_queue;
struct nvme_tcp_pdu *pdu;
struct nvme_tcp_pdu *pdu_pool;
/* Arrays of in-capsule buffers, requests, and pdus.
* Each array is 'resource_count' number of elements */
void *bufs;
struct spdk_nvmf_tcp_req *reqs;
struct nvme_tcp_pdu *pdus;
uint32_t resource_count;
uint16_t free_pdu_num;
/* Queues to track the requests in all states */
@ -229,22 +233,9 @@ struct spdk_nvmf_tcp_qpair {
uint8_t cpda;
/* Array of size "max_queue_depth * InCapsuleDataSize" containing
* buffers to be used for in capsule data.
*/
void *buf;
void *bufs;
struct spdk_nvmf_tcp_req *req;
struct spdk_nvmf_tcp_req *reqs;
bool host_hdgst_enable;
bool host_ddgst_enable;
/* The maximum number of I/O outstanding on this connection at one time */
uint16_t max_queue_depth;
/** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */
uint32_t maxh2cdata;
@ -449,17 +440,16 @@ spdk_nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair)
assert(err == 0);
spdk_nvmf_tcp_cleanup_all_states(tqpair);
if (tqpair->free_pdu_num != (tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
if (tqpair->free_pdu_num != tqpair->resource_count) {
SPDK_ERRLOG("tqpair(%p) free pdu pool num is %u but should be %u\n", tqpair,
tqpair->free_pdu_num,
(tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM));
tqpair->free_pdu_num, tqpair->resource_count);
err++;
}
if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->max_queue_depth) {
if (tqpair->state_cntr[TCP_REQUEST_STATE_FREE] != tqpair->resource_count) {
SPDK_ERRLOG("tqpair(%p) free tcp request num is %u but should be %u\n", tqpair,
tqpair->state_cntr[TCP_REQUEST_STATE_FREE],
tqpair->max_queue_depth);
tqpair->resource_count);
err++;
}
@ -472,11 +462,9 @@ spdk_nvmf_tcp_qpair_destroy(struct spdk_nvmf_tcp_qpair *tqpair)
if (err > 0) {
nvmf_tcp_dump_qpair_req_contents(tqpair);
}
spdk_dma_free(tqpair->pdu);
free(tqpair->pdu_pool);
free(tqpair->req);
spdk_dma_free(tqpair->pdus);
free(tqpair->reqs);
spdk_free(tqpair->buf);
spdk_free(tqpair->bufs);
free(tqpair->pdu_recv_buf.buf);
free(tqpair);
@ -852,81 +840,29 @@ spdk_nvmf_tcp_qpair_write_pdu(struct spdk_nvmf_tcp_qpair *tqpair,
}
static int
spdk_nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair, uint16_t size)
spdk_nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair)
{
int i;
struct spdk_nvmf_tcp_req *tcp_req;
struct spdk_nvmf_transport *transport = tqpair->qpair.transport;
uint32_t i;
struct spdk_nvmf_transport_opts *opts;
uint32_t in_capsule_data_size;
in_capsule_data_size = transport->opts.in_capsule_data_size;
if (transport->opts.dif_insert_or_strip) {
opts = &tqpair->qpair.transport->opts;
in_capsule_data_size = opts->in_capsule_data_size;
if (opts->dif_insert_or_strip) {
in_capsule_data_size = SPDK_BDEV_BUF_SIZE_WITH_MD(in_capsule_data_size);
}
if (!tqpair->qpair.sq_head_max) {
tqpair->req = calloc(1, sizeof(*tqpair->req));
if (!tqpair->req) {
SPDK_ERRLOG("Unable to allocate req on tqpair=%p.\n", tqpair);
return -1;
}
tqpair->resource_count = opts->max_queue_depth;
if (in_capsule_data_size) {
tqpair->buf = spdk_zmalloc(in_capsule_data_size, 0x1000,
NULL, SPDK_ENV_LCORE_ID_ANY,
SPDK_MALLOC_DMA);
if (!tqpair->buf) {
SPDK_ERRLOG("Unable to allocate buf on tqpair=%p.\n", tqpair);
return -1;
}
}
tcp_req = tqpair->req;
tcp_req->ttag = 0;
tcp_req->req.qpair = &tqpair->qpair;
/* Set up memory to receive commands */
if (tqpair->buf) {
tcp_req->buf = tqpair->buf;
}
/* Set the cmdn and rsp */
tcp_req->req.rsp = (union nvmf_c2h_msg *)&tcp_req->rsp;
tcp_req->req.cmd = (union nvmf_h2c_msg *)&tcp_req->cmd;
/* Initialize request state to FREE */
tcp_req->state = TCP_REQUEST_STATE_FREE;
TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
tqpair->pdu = spdk_dma_malloc((NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM + 1) * sizeof(*tqpair->pdu), 0x1000,
NULL);
if (!tqpair->pdu) {
SPDK_ERRLOG("Unable to allocate pdu on tqpair=%p.\n", tqpair);
return -1;
}
for (i = 0; i < 1 + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM; i++) {
TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu[i], tailq);
}
tqpair->pdu_recv_buf.size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 *
SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
tqpair->pdu_recv_buf.buf = calloc(1, tqpair->pdu_recv_buf.size);
if (!tqpair->pdu_recv_buf.buf) {
SPDK_ERRLOG("Unable to allocate the pdu recv buf on tqpair=%p with size=%d\n", tqpair,
tqpair->pdu_recv_buf.size);
return -1;
}
tqpair->pdu_in_progress.hdr = (union nvme_tcp_pdu_hdr *)tqpair->pdu_recv_buf.buf;
} else {
tqpair->reqs = calloc(size, sizeof(*tqpair->reqs));
tqpair->reqs = calloc(tqpair->resource_count, sizeof(*tqpair->reqs));
if (!tqpair->reqs) {
SPDK_ERRLOG("Unable to allocate reqs on tqpair=%p\n", tqpair);
return -1;
}
if (in_capsule_data_size) {
tqpair->bufs = spdk_zmalloc(size * in_capsule_data_size, 0x1000,
tqpair->bufs = spdk_zmalloc(tqpair->resource_count * in_capsule_data_size, 0x1000,
NULL, SPDK_ENV_LCORE_ID_ANY,
SPDK_MALLOC_DMA);
if (!tqpair->bufs) {
@ -935,7 +871,7 @@ spdk_nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair, uint16
}
}
for (i = 0; i < size; i++) {
for (i = 0; i < tqpair->resource_count; i++) {
struct spdk_nvmf_tcp_req *tcp_req = &tqpair->reqs[i];
tcp_req->ttag = i + 1;
@ -953,18 +889,29 @@ spdk_nvmf_tcp_qpair_init_mem_resource(struct spdk_nvmf_tcp_qpair *tqpair, uint16
/* Initialize request state to FREE */
tcp_req->state = TCP_REQUEST_STATE_FREE;
TAILQ_INSERT_TAIL(&tqpair->state_queue[tcp_req->state], tcp_req, state_link);
tqpair->state_cntr[TCP_REQUEST_STATE_FREE]++;
}
tqpair->pdu_pool = calloc(size, sizeof(*tqpair->pdu_pool));
if (!tqpair->pdu_pool) {
tqpair->pdus = spdk_dma_malloc(tqpair->resource_count * sizeof(*tqpair->pdus), 0x1000, NULL);
if (!tqpair->pdus) {
SPDK_ERRLOG("Unable to allocate pdu pool on tqpair =%p.\n", tqpair);
return -1;
}
for (i = 0; i < size; i++) {
TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdu_pool[i], tailq);
for (i = 0; i < tqpair->resource_count; i++) {
TAILQ_INSERT_TAIL(&tqpair->free_queue, &tqpair->pdus[i], tailq);
tqpair->free_pdu_num++;
}
tqpair->pdu_recv_buf.size = (in_capsule_data_size + sizeof(struct spdk_nvme_tcp_cmd) + 2 *
SPDK_NVME_TCP_DIGEST_LEN) * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR;
tqpair->pdu_recv_buf.buf = calloc(1, tqpair->pdu_recv_buf.size);
if (!tqpair->pdu_recv_buf.buf) {
SPDK_ERRLOG("Unable to allocate the pdu recv buf on tqpair=%p with size=%d\n", tqpair,
tqpair->pdu_recv_buf.size);
return -1;
}
tqpair->pdu_in_progress.hdr = (union nvme_tcp_pdu_hdr *)tqpair->pdu_recv_buf.buf;
return 0;
}
@ -1037,9 +984,8 @@ _spdk_nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport,
}
tqpair->sock = sock;
tqpair->max_queue_depth = 1;
tqpair->free_pdu_num = tqpair->max_queue_depth + NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM;
tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = tqpair->max_queue_depth;
tqpair->free_pdu_num = 0;
tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = 0;
tqpair->port = port;
tqpair->qpair.transport = transport;
@ -2244,8 +2190,7 @@ spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair)
{
struct spdk_nvmf_tcp_req *tcp_req;
while (!STAILQ_EMPTY(&tqpair->queued_c2h_data_tcp_req) &&
(tqpair->c2h_data_pdu_cnt < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM)) {
while (!STAILQ_EMPTY(&tqpair->queued_c2h_data_tcp_req) && tqpair->free_pdu_num > 0) {
tcp_req = STAILQ_FIRST(&tqpair->queued_c2h_data_tcp_req);
spdk_nvmf_tcp_send_c2h_data(tqpair, tcp_req);
}
@ -2257,8 +2202,6 @@ spdk_nvmf_tcp_queue_c2h_data(struct spdk_nvmf_tcp_req *tcp_req,
{
tcp_req->c2h_data_pdu_num = spdk_nvmf_tcp_calc_c2h_data_pdu_num(tcp_req);
assert(tcp_req->c2h_data_pdu_num < NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM);
STAILQ_INSERT_TAIL(&tqpair->queued_c2h_data_tcp_req, tcp_req, link);
spdk_nvmf_tcp_handle_pending_c2h_data_queue(tqpair);
}
@ -2549,7 +2492,7 @@ spdk_nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
return -1;
}
rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, 1);
rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair);
if (rc < 0) {
SPDK_ERRLOG("Cannot init memory resource info for tqpair=%p\n", tqpair);
return -1;
@ -2695,26 +2638,6 @@ spdk_nvmf_tcp_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
return spdk_nvmf_tcp_qpair_get_trid(qpair, trid, 0);
}
static int
spdk_nvmf_tcp_qpair_set_sq_size(struct spdk_nvmf_qpair *qpair)
{
struct spdk_nvmf_tcp_qpair *tqpair;
int rc;
tqpair = SPDK_CONTAINEROF(qpair, struct spdk_nvmf_tcp_qpair, qpair);
rc = spdk_nvmf_tcp_qpair_init_mem_resource(tqpair, tqpair->qpair.sq_head_max);
if (!rc) {
tqpair->max_queue_depth += tqpair->qpair.sq_head_max;
tqpair->free_pdu_num += tqpair->qpair.sq_head_max;
tqpair->state_cntr[TCP_REQUEST_STATE_FREE] += tqpair->qpair.sq_head_max;
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "The queue depth=%u for tqpair=%p\n",
tqpair->max_queue_depth, tqpair);
}
return rc;
}
#define SPDK_NVMF_TCP_DEFAULT_MAX_QUEUE_DEPTH 128
#define SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH 128
#define SPDK_NVMF_TCP_DEFAULT_MAX_QPAIRS_PER_CTRLR 128
@ -2770,7 +2693,6 @@ const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = {
.qpair_get_local_trid = spdk_nvmf_tcp_qpair_get_local_trid,
.qpair_get_peer_trid = spdk_nvmf_tcp_qpair_get_peer_trid,
.qpair_get_listen_trid = spdk_nvmf_tcp_qpair_get_listen_trid,
.qpair_set_sqsize = spdk_nvmf_tcp_qpair_set_sq_size,
};
SPDK_NVMF_TRANSPORT_REGISTER(tcp, &spdk_nvmf_transport_tcp);

View File

@ -358,16 +358,6 @@ spdk_nvmf_transport_opts_init(const char *transport_name,
return true;
}
int
spdk_nvmf_transport_qpair_set_sqsize(struct spdk_nvmf_qpair *qpair)
{
if (qpair->transport->ops->qpair_set_sqsize) {
return qpair->transport->ops->qpair_set_sqsize(qpair);
}
return 0;
}
int
spdk_nvmf_transport_poll_group_get_stat(struct spdk_nvmf_tgt *tgt,
struct spdk_nvmf_transport *transport,

View File

@ -89,7 +89,6 @@ int spdk_nvmf_transport_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
int spdk_nvmf_transport_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
struct spdk_nvme_transport_id *trid);
int spdk_nvmf_transport_qpair_set_sqsize(struct spdk_nvmf_qpair *qpair);
bool spdk_nvmf_transport_opts_init(const char *transport_name,
struct spdk_nvmf_transport_opts *opts);

View File

@ -119,11 +119,6 @@ DEFINE_STUB(spdk_nvmf_subsystem_listener_allowed,
(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvme_transport_id *trid),
true);
DEFINE_STUB(spdk_nvmf_transport_qpair_set_sqsize,
int,
(struct spdk_nvmf_qpair *qpair),
0);
DEFINE_STUB(spdk_nvmf_bdev_ctrlr_read_cmd,
int,
(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,

View File

@ -84,11 +84,6 @@ DEFINE_STUB(spdk_nvmf_subsystem_listener_allowed,
(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvme_transport_id *trid),
true);
DEFINE_STUB(spdk_nvmf_transport_qpair_set_sqsize,
int,
(struct spdk_nvmf_qpair *qpair),
0);
DEFINE_STUB_V(spdk_nvmf_get_discovery_log_page,
(struct spdk_nvmf_tgt *tgt, const char *hostnqn, struct iovec *iov,
uint32_t iovcnt, uint64_t offset, uint32_t length));