From 31db7b139b6107eb25a31f7b1a124c04ad3afc9f Mon Sep 17 00:00:00 2001 From: zhangduan Date: Mon, 11 Apr 2022 14:36:46 +0800 Subject: [PATCH] nvme_tcp: set transport_ack_timeout to ack_timeout The value of ack_timeout is calculated according to the formula 2^(transport_ack_timeout) msec. Signed-off-by: zhangduan Change-Id: I5a938635d70693ddd405fa5907555bb745b4df0f Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12215 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Shuhei Matsumoto Reviewed-by: Aleksey Marchuk Reviewed-by: Jim Harris --- doc/jsonrpc.md | 2 +- include/spdk/nvme.h | 10 ++++++++-- lib/nvme/nvme_tcp.c | 15 +++++++++++++++ python/spdk/rpc/bdev.py | 2 +- scripts/rpc.py | 2 +- 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index 22db68ee0..97ebc29d2 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -2959,7 +2959,7 @@ io_queue_requests | Optional | number | The number of requests all delay_cmd_submit | Optional | boolean | Enable delaying NVMe command submission to allow batching of multiple commands. Default: `true`. transport_retry_count | Optional | number | The number of attempts per I/O in the transport layer before an I/O fails. bdev_retry_count | Optional | number | The number of attempts per I/O in the bdev layer before an I/O fails. -1 means infinite retries. -transport_ack_timeout | Optional | number | Time to wait ack until packet retransmission. RDMA specific. Range 0-31 where 0 is driver-specific default value. +transport_ack_timeout | Optional | number | Time to wait ack until retransmission for RDMA or connection close for TCP. Range 0-31 where 0 means use default. ctrlr_loss_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before deleting ctrlr. -1 means infinite reconnects. 0 means no reconnect. reconnect_delay_sec | Optional | number | Time to delay a reconnect trial. 0 means no reconnect. fast_io_fail_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before failing I/O to ctrlr. 0 means no such timeout. diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h index c77ee73f7..88f38b4c3 100644 --- a/include/spdk/nvme.h +++ b/include/spdk/nvme.h @@ -227,9 +227,10 @@ struct spdk_nvme_ctrlr_opts { bool disable_error_logging; /** - * It is used for RDMA transport + * It is used for both RDMA & TCP transport * Specify the transport ACK timeout. The value should be in range 0-31 where 0 means - * use driver-specific default value. The value is applied to each RDMA qpair + * use driver-specific default value. + * RDMA: The value is applied to each qpair * and affects the time that qpair waits for transport layer acknowledgement * until it retransmits a packet. The value should be chosen empirically * to meet the needs of a particular application. A low value means less time @@ -237,6 +238,11 @@ struct spdk_nvme_ctrlr_opts { * A large value can increase the time the connection is closed. * The value of ACK timeout is calculated according to the formula * 4.096 * 2^(transport_ack_timeout) usec. + * TCP: The value is applied to each qpair + * and affects the time that qpair waits for transport layer acknowledgement + * until connection is closed forcefully. + * The value of ACK timeout is calculated according to the formula + * 2^(transport_ack_timeout) msec. */ uint8_t transport_ack_timeout; diff --git a/lib/nvme/nvme_tcp.c b/lib/nvme/nvme_tcp.c index b31d8d28b..3306e6eba 100644 --- a/lib/nvme/nvme_tcp.c +++ b/lib/nvme/nvme_tcp.c @@ -59,6 +59,12 @@ #define NVME_TCP_MAX_R2T_DEFAULT 1 #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096 +/* + * Maximum value of transport_ack_timeout used by TCP controller + */ +#define NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT 31 + + /* NVMe TCP transport extensions for spdk_nvme_ctrlr */ struct nvme_tcp_ctrlr { struct spdk_nvme_ctrlr ctrlr; @@ -1926,6 +1932,9 @@ nvme_tcp_qpair_connect_sock(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpai spdk_sock_get_default_opts(&opts); opts.priority = ctrlr->trid.priority; opts.zcopy = !nvme_qpair_is_admin_queue(qpair); + if (ctrlr->opts.transport_ack_timeout) { + opts.ack_timeout = 1ULL << ctrlr->opts.transport_ack_timeout; + } tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, NULL, &opts); if (!tqpair->sock) { SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n", @@ -2119,6 +2128,12 @@ static struct spdk_nvme_ctrlr *nvme_tcp_ctrlr_construct(const struct spdk_nvme_t tctrlr->ctrlr.opts = *opts; tctrlr->ctrlr.trid = *trid; + if (opts->transport_ack_timeout > NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT) { + SPDK_NOTICELOG("transport_ack_timeout exceeds max value %d, use max value\n", + NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT); + tctrlr->ctrlr.opts.transport_ack_timeout = NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT; + } + rc = nvme_ctrlr_construct(&tctrlr->ctrlr); if (rc != 0) { free(tctrlr); diff --git a/python/spdk/rpc/bdev.py b/python/spdk/rpc/bdev.py index c2f03a47c..abd99f6df 100644 --- a/python/spdk/rpc/bdev.py +++ b/python/spdk/rpc/bdev.py @@ -484,7 +484,7 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo delay_cmd_submit: Enable delayed NVMe command submission to allow batching of multiple commands (optional) transport_retry_count: The number of attempts per I/O in the transport layer when an I/O fails (optional) bdev_retry_count: The number of attempts per I/O in the bdev layer when an I/O fails. -1 means infinite retries. (optional) - transport_ack_timeout: Time to wait ack until packet retransmission. RDMA specific. + transport_ack_timeout: Time to wait ack until packet retransmission for RDMA or until closes connection for TCP. Range 0-31 where 0 is driver-specific default value (optional) ctrlr_loss_timeout_sec: Time to wait until ctrlr is reconnected before deleting ctrlr. -1 means infinite reconnect retries. 0 means no reconnect retry. diff --git a/scripts/rpc.py b/scripts/rpc.py index 224111ddf..43765d6a3 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -545,7 +545,7 @@ if __name__ == "__main__": p.add_argument('-r', '--bdev-retry-count', help='the number of attempts per I/O in the bdev layer when an I/O fails. -1 means infinite retries.', type=int) p.add_argument('-e', '--transport-ack-timeout', - help="""Time to wait ack until packet retransmission. RDMA specific. + help="""Time to wait ack until packet retransmission for RDMA or until closes connection for TCP. Range 0-31 where 0 is driver-specific default value.""", type=int) p.add_argument('-l', '--ctrlr-loss-timeout-sec', help="""Time to wait until ctrlr is reconnected before deleting ctrlr.