nvme_rdma: Support TOS for RDMA initiator

The spdk_nvme_ctrlr_opts now supports a transport_tos option
that allows setting of the 'type of service' value in the IPv4 header.

This is needed to support lossless RoCE setups.

Note: Only RDMA is supported at this point.

Change-Id: I21825fc197c60f539a7d2d651a970ea380d8b56d
Signed-off-by: Michael Haeuptle <michael.haeuptle@hpe.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15908
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Michael Haeuptle 2022-12-13 20:56:06 +00:00 committed by Jim Harris
parent ce92d919d7
commit 7706450f2a
6 changed files with 37 additions and 1 deletions

View File

@ -110,6 +110,11 @@ tags will appear in SPDK's log at the warn level. As the SPDK application exits,
will log a summary of how many times `SPDK_LOG_DEPRECATED()` was called for each will log a summary of how many times `SPDK_LOG_DEPRECATED()` was called for each
tag that was logged at least once. tag that was logged at least once.
### nvme
Added `transport_tos` to `spdk_nvme_ctrlr_opts` to support setting of the "type of service"
value in the IPv4 header. Only RDMA is supported at this time.
## v22.09 ## v22.09
### accel ### accel

1
CONFIG
View File

@ -92,6 +92,7 @@ CONFIG_FIO_SOURCE_DIR=/usr/src/fio
CONFIG_RDMA=n CONFIG_RDMA=n
CONFIG_RDMA_SEND_WITH_INVAL=n CONFIG_RDMA_SEND_WITH_INVAL=n
CONFIG_RDMA_SET_ACK_TIMEOUT=n CONFIG_RDMA_SET_ACK_TIMEOUT=n
CONFIG_RDMA_SET_TOS=n
CONFIG_RDMA_PROV=verbs CONFIG_RDMA_PROV=verbs
# Enable NVMe Character Devices. # Enable NVMe Character Devices.

9
configure vendored
View File

@ -846,6 +846,15 @@ than or equal to 4.14 will see significantly reduced performance.
echo "RDMA_OPTION_ID_ACK_TIMEOUT is not supported" echo "RDMA_OPTION_ID_ACK_TIMEOUT is not supported"
fi fi
if echo -e '#include <rdma/rdma_cma.h>\n' \
'int main(void) { return !!RDMA_OPTION_ID_TOS; }\n' \
| "${BUILD_CMD[@]}" -c - 2> /dev/null; then
CONFIG[RDMA_SET_TOS]="y"
else
CONFIG[RDMA_SET_TOS]="n"
echo "RDMA_OPTION_ID_TOS is not supported"
fi
if [ "${CONFIG[RDMA_PROV]}" == "mlx5_dv" ]; then if [ "${CONFIG[RDMA_PROV]}" == "mlx5_dv" ]; then
if ! echo -e '#include <spdk/stdinc.h>\n' \ if ! echo -e '#include <spdk/stdinc.h>\n' \
'#include <infiniband/mlx5dv.h>\n' \ '#include <infiniband/mlx5dv.h>\n' \

View File

@ -275,8 +275,15 @@ struct spdk_nvme_ctrlr_opts {
* 12B (header) + 2B (hash) + 176B (base64 for 1024b + crc32) + 3B (colons) + 1B (NULL) + 6B (extra space for future) * 12B (header) + 2B (hash) + 176B (base64 for 1024b + crc32) + 3B (colons) + 1B (NULL) + 6B (extra space for future)
*/ */
char psk[200]; char psk[200];
/**
* It is used for RDMA transport.
*
* Set the IP protocol type of service value for RDMA transport. Default is 0, which means that the TOS will not be set.
*/
uint8_t transport_tos;
} __attribute__((packed)); } __attribute__((packed));
SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_ctrlr_opts) == 817, "Incorrect size"); SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_ctrlr_opts) == 818, "Incorrect size");
/** /**
* NVMe acceleration operation callback. * NVMe acceleration operation callback.

View File

@ -171,6 +171,8 @@ extern pid_t g_spdk_nvme_pid;
#define SPDK_NVME_TRANSPORT_ACK_TIMEOUT_DISABLED (0) #define SPDK_NVME_TRANSPORT_ACK_TIMEOUT_DISABLED (0)
#define SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT SPDK_NVME_TRANSPORT_ACK_TIMEOUT_DISABLED #define SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT SPDK_NVME_TRANSPORT_ACK_TIMEOUT_DISABLED
#define SPDK_NVME_TRANSPORT_TOS_DISABLED (0)
#define MIN_KEEP_ALIVE_TIMEOUT_IN_MS (10000) #define MIN_KEEP_ALIVE_TIMEOUT_IN_MS (10000)
/* We want to fit submission and completion rings each in a single 2MB /* We want to fit submission and completion rings each in a single 2MB

View File

@ -1023,6 +1023,18 @@ nvme_rdma_addr_resolved(struct nvme_rdma_qpair *rqpair, int ret)
#endif #endif
} }
if (rqpair->qpair.ctrlr->opts.transport_tos != SPDK_NVME_TRANSPORT_TOS_DISABLED) {
#ifdef SPDK_CONFIG_RDMA_SET_TOS
uint8_t tos = rqpair->qpair.ctrlr->opts.transport_tos;
ret = rdma_set_option(rqpair->cm_id, RDMA_OPTION_ID, RDMA_OPTION_ID_TOS, &tos, sizeof(tos));
if (ret) {
SPDK_NOTICELOG("Can't apply RDMA_OPTION_ID_TOS %u, ret %d\n", tos, ret);
}
#else
SPDK_DEBUGLOG(nvme, "transport_tos is not supported\n");
#endif
}
ret = rdma_resolve_route(rqpair->cm_id, NVME_RDMA_TIME_OUT_IN_MS); ret = rdma_resolve_route(rqpair->cm_id, NVME_RDMA_TIME_OUT_IN_MS);
if (ret) { if (ret) {
SPDK_ERRLOG("rdma_resolve_route\n"); SPDK_ERRLOG("rdma_resolve_route\n");