nvme_tcp: set transport_ack_timeout to ack_timeout

The value of ack_timeout is calculated according to
the formula 2^(transport_ack_timeout) msec.

Signed-off-by: zhangduan <zhangd28@chinatelecom.cn>
Change-Id: I5a938635d70693ddd405fa5907555bb745b4df0f
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12215
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
zhangduan 2022-04-11 14:36:46 +08:00 committed by Tomasz Zawadzki
parent b37fe43d55
commit 31db7b139b
5 changed files with 26 additions and 5 deletions

View File

@ -2959,7 +2959,7 @@ io_queue_requests | Optional | number | The number of requests all
delay_cmd_submit | Optional | boolean | Enable delaying NVMe command submission to allow batching of multiple commands. Default: `true`. delay_cmd_submit | Optional | boolean | Enable delaying NVMe command submission to allow batching of multiple commands. Default: `true`.
transport_retry_count | Optional | number | The number of attempts per I/O in the transport layer before an I/O fails. transport_retry_count | Optional | number | The number of attempts per I/O in the transport layer before an I/O fails.
bdev_retry_count | Optional | number | The number of attempts per I/O in the bdev layer before an I/O fails. -1 means infinite retries. bdev_retry_count | Optional | number | The number of attempts per I/O in the bdev layer before an I/O fails. -1 means infinite retries.
transport_ack_timeout | Optional | number | Time to wait ack until packet retransmission. RDMA specific. Range 0-31 where 0 is driver-specific default value. transport_ack_timeout | Optional | number | Time to wait ack until retransmission for RDMA or connection close for TCP. Range 0-31 where 0 means use default.
ctrlr_loss_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before deleting ctrlr. -1 means infinite reconnects. 0 means no reconnect. ctrlr_loss_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before deleting ctrlr. -1 means infinite reconnects. 0 means no reconnect.
reconnect_delay_sec | Optional | number | Time to delay a reconnect trial. 0 means no reconnect. reconnect_delay_sec | Optional | number | Time to delay a reconnect trial. 0 means no reconnect.
fast_io_fail_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before failing I/O to ctrlr. 0 means no such timeout. fast_io_fail_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before failing I/O to ctrlr. 0 means no such timeout.

View File

@ -227,9 +227,10 @@ struct spdk_nvme_ctrlr_opts {
bool disable_error_logging; bool disable_error_logging;
/** /**
* It is used for RDMA transport * It is used for both RDMA & TCP transport
* Specify the transport ACK timeout. The value should be in range 0-31 where 0 means * Specify the transport ACK timeout. The value should be in range 0-31 where 0 means
* use driver-specific default value. The value is applied to each RDMA qpair * use driver-specific default value.
* RDMA: The value is applied to each qpair
* and affects the time that qpair waits for transport layer acknowledgement * and affects the time that qpair waits for transport layer acknowledgement
* until it retransmits a packet. The value should be chosen empirically * until it retransmits a packet. The value should be chosen empirically
* to meet the needs of a particular application. A low value means less time * to meet the needs of a particular application. A low value means less time
@ -237,6 +238,11 @@ struct spdk_nvme_ctrlr_opts {
* A large value can increase the time the connection is closed. * A large value can increase the time the connection is closed.
* The value of ACK timeout is calculated according to the formula * The value of ACK timeout is calculated according to the formula
* 4.096 * 2^(transport_ack_timeout) usec. * 4.096 * 2^(transport_ack_timeout) usec.
* TCP: The value is applied to each qpair
* and affects the time that qpair waits for transport layer acknowledgement
* until connection is closed forcefully.
* The value of ACK timeout is calculated according to the formula
* 2^(transport_ack_timeout) msec.
*/ */
uint8_t transport_ack_timeout; uint8_t transport_ack_timeout;

View File

@ -59,6 +59,12 @@
#define NVME_TCP_MAX_R2T_DEFAULT 1 #define NVME_TCP_MAX_R2T_DEFAULT 1
#define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096 #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE 4096
/*
* Maximum value of transport_ack_timeout used by TCP controller
*/
#define NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT 31
/* NVMe TCP transport extensions for spdk_nvme_ctrlr */ /* NVMe TCP transport extensions for spdk_nvme_ctrlr */
struct nvme_tcp_ctrlr { struct nvme_tcp_ctrlr {
struct spdk_nvme_ctrlr ctrlr; struct spdk_nvme_ctrlr ctrlr;
@ -1926,6 +1932,9 @@ nvme_tcp_qpair_connect_sock(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpai
spdk_sock_get_default_opts(&opts); spdk_sock_get_default_opts(&opts);
opts.priority = ctrlr->trid.priority; opts.priority = ctrlr->trid.priority;
opts.zcopy = !nvme_qpair_is_admin_queue(qpair); opts.zcopy = !nvme_qpair_is_admin_queue(qpair);
if (ctrlr->opts.transport_ack_timeout) {
opts.ack_timeout = 1ULL << ctrlr->opts.transport_ack_timeout;
}
tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, NULL, &opts); tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, NULL, &opts);
if (!tqpair->sock) { if (!tqpair->sock) {
SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n", SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n",
@ -2119,6 +2128,12 @@ static struct spdk_nvme_ctrlr *nvme_tcp_ctrlr_construct(const struct spdk_nvme_t
tctrlr->ctrlr.opts = *opts; tctrlr->ctrlr.opts = *opts;
tctrlr->ctrlr.trid = *trid; tctrlr->ctrlr.trid = *trid;
if (opts->transport_ack_timeout > NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT) {
SPDK_NOTICELOG("transport_ack_timeout exceeds max value %d, use max value\n",
NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT);
tctrlr->ctrlr.opts.transport_ack_timeout = NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT;
}
rc = nvme_ctrlr_construct(&tctrlr->ctrlr); rc = nvme_ctrlr_construct(&tctrlr->ctrlr);
if (rc != 0) { if (rc != 0) {
free(tctrlr); free(tctrlr);

View File

@ -484,7 +484,7 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo
delay_cmd_submit: Enable delayed NVMe command submission to allow batching of multiple commands (optional) delay_cmd_submit: Enable delayed NVMe command submission to allow batching of multiple commands (optional)
transport_retry_count: The number of attempts per I/O in the transport layer when an I/O fails (optional) transport_retry_count: The number of attempts per I/O in the transport layer when an I/O fails (optional)
bdev_retry_count: The number of attempts per I/O in the bdev layer when an I/O fails. -1 means infinite retries. (optional) bdev_retry_count: The number of attempts per I/O in the bdev layer when an I/O fails. -1 means infinite retries. (optional)
transport_ack_timeout: Time to wait ack until packet retransmission. RDMA specific. transport_ack_timeout: Time to wait ack until packet retransmission for RDMA or until closes connection for TCP.
Range 0-31 where 0 is driver-specific default value (optional) Range 0-31 where 0 is driver-specific default value (optional)
ctrlr_loss_timeout_sec: Time to wait until ctrlr is reconnected before deleting ctrlr. ctrlr_loss_timeout_sec: Time to wait until ctrlr is reconnected before deleting ctrlr.
-1 means infinite reconnect retries. 0 means no reconnect retry. -1 means infinite reconnect retries. 0 means no reconnect retry.

View File

@ -545,7 +545,7 @@ if __name__ == "__main__":
p.add_argument('-r', '--bdev-retry-count', p.add_argument('-r', '--bdev-retry-count',
help='the number of attempts per I/O in the bdev layer when an I/O fails. -1 means infinite retries.', type=int) help='the number of attempts per I/O in the bdev layer when an I/O fails. -1 means infinite retries.', type=int)
p.add_argument('-e', '--transport-ack-timeout', p.add_argument('-e', '--transport-ack-timeout',
help="""Time to wait ack until packet retransmission. RDMA specific. help="""Time to wait ack until packet retransmission for RDMA or until closes connection for TCP.
Range 0-31 where 0 is driver-specific default value.""", type=int) Range 0-31 where 0 is driver-specific default value.""", type=int)
p.add_argument('-l', '--ctrlr-loss-timeout-sec', p.add_argument('-l', '--ctrlr-loss-timeout-sec',
help="""Time to wait until ctrlr is reconnected before deleting ctrlr. help="""Time to wait until ctrlr is reconnected before deleting ctrlr.