From 94966468ae249c769795a8e40ffd94d2bc3bda62 Mon Sep 17 00:00:00 2001 From: Alexey Marchuk Date: Tue, 21 Jan 2020 17:03:40 +0300 Subject: [PATCH] nvme/rdma: Introduce transport_ack_timeout Add transport_ack_timeout parameter to nvme controller opts. This parameter allows to configure RDMA ACK timeout according to the formula 4.096 * 2^(transport_ack_timeout) usec. The parameter should be in range 0..31 where 0 means use driver-specific default value. Change-Id: I0c8a5a636aa9d816bda5c1ba58f56a00a585b060 Signed-off-by: Alexey Marchuk Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/502 Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Shuhei Matsumoto --- CHANGELOG.md | 3 +++ CONFIG | 1 + configure | 10 ++++++++++ include/spdk/nvme.h | 16 +++++++++++++++- lib/nvme/nvme_ctrlr.c | 4 ++++ lib/nvme/nvme_internal.h | 3 +++ lib/nvme/nvme_rdma.c | 26 ++++++++++++++++++++++++++ 7 files changed, 62 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0bd3516e..09a445906 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -156,6 +156,9 @@ the target expects the initiator to send both the compare command and write comm SPDK initiator currently respects this requirement, but this note is included as a flag for other initiators attempting compatibility with this version of SPDK. +The `spdk_nvme_ctrlr_opts` struct has been extended with new field `transport_ack_timeout` which allows +to configure transport ACK timeout. Applicable for RDMA transport only. + ### rpc A new RPC, `bdev_zone_block_create`, enables creating an emulated zoned bdev on top of a standard block device. diff --git a/CONFIG b/CONFIG index cc450e8e7..e77332bfd 100644 --- a/CONFIG +++ b/CONFIG @@ -93,6 +93,7 @@ CONFIG_FIO_SOURCE_DIR=/usr/src/fio # Requires ibverbs development libraries. CONFIG_RDMA=n CONFIG_RDMA_SEND_WITH_INVAL=n +CONFIG_RDMA_SET_ACK_TIMEOUT=n # Enable NVMe Character Devices. CONFIG_NVME_CUSE=n diff --git a/configure b/configure index e8b7933ad..095fb7924 100755 --- a/configure +++ b/configure @@ -569,6 +569,16 @@ of libibverbs, so Linux kernel NVMe-oF initiators based on kernels greater than or equal to 4.14 will see significantly reduced performance. *******************************************************************************" fi + + if echo -e '#include \n' \ + 'int main(void) { return !!RDMA_OPTION_ID_ACK_TIMEOUT; }\n' \ + | ${BUILD_CMD[@]} -c - 2>/dev/null; then + CONFIG[RDMA_SET_ACK_TIMEOUT]="y" + else + CONFIG[RDMA_SET_ACK_TIMEOUT]="n" + echo "RDMA_OPTION_ID_ACK_TIMEOUT is not supported" + fi + fi if [[ "${CONFIG[FC]}" = "y" ]]; then diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h index e6318626b..4d2267c48 100644 --- a/include/spdk/nvme.h +++ b/include/spdk/nvme.h @@ -2,7 +2,7 @@ * BSD LICENSE * * Copyright (c) Intel Corporation. All rights reserved. - * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -220,6 +220,20 @@ struct spdk_nvme_ctrlr_opts { * Defaults to 'false' (errors are logged). */ bool disable_error_logging; + + /** + * It is used for RDMA transport + * Specify the transport ACK timeout. The value should be in range 0-31 where 0 means + * use driver-specific default value. The value is applied to each RDMA qpair + * and affects the time that qpair waits for transport layer acknowledgement + * until it retransmits a packet. The value should be chosen empirically + * to meet the needs of a particular application. A low value means less time + * the qpair waits for ACK which can increase the number of retransmissions. + * A large value can increase the time the connection is closed. + * The value of ACK timeout is calculated according to the formula + * 4.096 * 2^(transport_ack_timeout) usec. + */ + uint8_t transport_ack_timeout; }; /** diff --git a/lib/nvme/nvme_ctrlr.c b/lib/nvme/nvme_ctrlr.c index d4c2b9956..2f342d050 100644 --- a/lib/nvme/nvme_ctrlr.c +++ b/lib/nvme/nvme_ctrlr.c @@ -174,6 +174,10 @@ spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t if (FIELD_OK(disable_error_logging)) { opts->disable_error_logging = false; } + + if (FIELD_OK(transport_ack_timeout)) { + opts->transport_ack_timeout = SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT; + } #undef FIELD_OK } diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h index 80e2ed646..bb46c89d8 100644 --- a/lib/nvme/nvme_internal.h +++ b/lib/nvme/nvme_internal.h @@ -159,6 +159,9 @@ extern pid_t g_spdk_nvme_pid; #define SPDK_NVME_DEFAULT_RETRY_COUNT (4) +#define SPDK_NVME_TRANSPORT_ACK_TIMEOUT_DISABLED (0) +#define SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT SPDK_NVME_TRANSPORT_ACK_TIMEOUT_DISABLED + #define MIN_KEEP_ALIVE_TIMEOUT_IN_MS (10000) /* We want to fit submission and completion rings each in a single 2MB diff --git a/lib/nvme/nvme_rdma.c b/lib/nvme/nvme_rdma.c index 3d570dd41..95d6b3836 100644 --- a/lib/nvme/nvme_rdma.c +++ b/lib/nvme/nvme_rdma.c @@ -51,6 +51,7 @@ #include "spdk/string.h" #include "spdk/endian.h" #include "spdk/likely.h" +#include "spdk/config.h" #include "nvme_internal.h" @@ -84,6 +85,11 @@ */ #define NVME_RDMA_CTRLR_MAX_TRANSPORT_RETRY_COUNT 7 +/* + * Maximum value of transport_ack_timeout used by RDMA controller + */ +#define NVME_RDMA_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT 31 + struct spdk_nvmf_cmd { struct spdk_nvme_cmd cmd; struct spdk_nvme_sgl_descriptor sgl[NVME_RDMA_MAX_SGL_DESCRIPTORS]; @@ -859,6 +865,21 @@ nvme_rdma_resolve_addr(struct nvme_rdma_qpair *rqpair, return -1; } + if (rqpair->qpair.ctrlr->opts.transport_ack_timeout != SPDK_NVME_TRANSPORT_ACK_TIMEOUT_DISABLED) { +#ifdef SPDK_CONFIG_RDMA_SET_ACK_TIMEOUT + uint8_t timeout = rqpair->qpair.ctrlr->opts.transport_ack_timeout; + ret = rdma_set_option(rqpair->cm_id, RDMA_OPTION_ID, + RDMA_OPTION_ID_ACK_TIMEOUT, + &timeout, sizeof(timeout)); + if (ret) { + SPDK_NOTICELOG("Can't apply RDMA_OPTION_ID_ACK_TIMEOUT %d, ret %d\n", timeout, ret); + } +#else + SPDK_DEBUGLOG(SPDK_LOG_NVME, "transport_ack_timeout is not supported\n"); +#endif + } + + ret = rdma_resolve_route(rqpair->cm_id, NVME_RDMA_TIME_OUT_IN_MS); if (ret) { SPDK_ERRLOG("rdma_resolve_route\n"); @@ -1732,6 +1753,11 @@ static struct spdk_nvme_ctrlr *nvme_rdma_ctrlr_construct(const struct spdk_nvme_ NVME_RDMA_CTRLR_MAX_TRANSPORT_RETRY_COUNT); rctrlr->ctrlr.opts.transport_retry_count = NVME_RDMA_CTRLR_MAX_TRANSPORT_RETRY_COUNT; } + if (rctrlr->ctrlr.opts.transport_ack_timeout > NVME_RDMA_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT) { + SPDK_NOTICELOG("transport_ack_timeout exceeds max value %d, use max value\n", + NVME_RDMA_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT); + rctrlr->ctrlr.opts.transport_ack_timeout = NVME_RDMA_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT; + } contexts = rdma_get_devices(NULL); if (contexts == NULL) {