nvmf/tcp: Use the success optimization by default
By now (5.1 is released), the Linux kernel initiator supports the success optimization and further, the version that doesn't support it (5.0) was EOL-ed. As such, lets open it up @ spdk by default. Doing so provides a notable performance improvement: running perf with iodepth of 64, randread, two threads and block size of 512 bytes for 60s ("-q 64 -w randread -o 512 -c 0x5000 -t 60") over the VMA socket acceleration library and null backing store, we got 730K IOPS with the success optimization vs 550K without it. IOPS MiB/s Average min max 549274.10 268.20 232.99 93.23 3256354.96 728117.57 355.53 175.76 85.93 14632.16 To allow for interop with older kernel initiators, we added a config knob under which the success optimization can be enabled or disabled. Change-Id: Ia4c79f607f82c3563523ae3e07a67eac95b56dbb Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/457644 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Ziye Yang <ziye.yang@intel.com> Reviewed-by: Darek Stojaczyk <dariusz.stojaczyk@intel.com>
This commit is contained in:
parent
2224554eec
commit
6629202cbd
@ -8,6 +8,9 @@ A new file API `spdk_posix_file_load` was added to load file content into a data
|
||||
|
||||
### NVMe-oF Target
|
||||
|
||||
The c2h success optimization under which a command capsule response is not sent
|
||||
for reads is turned on. A config knob was added to allow for enable/disable.
|
||||
|
||||
Shared receive queue can now be disabled even for NICs that support it using the
|
||||
`nvmf_create_transport` RPC method parameter `no_srq`. The actual use of a shared
|
||||
receive queue is predicated on hardware support when this flag is not used.
|
||||
|
@ -134,6 +134,9 @@
|
||||
# Set the number of shared buffers to be cached per poll group
|
||||
#BufCacheSize 32
|
||||
|
||||
# Set whether to use the C2H Success optimization, only used for TCP transport.
|
||||
# C2HSuccess true
|
||||
|
||||
[Nvme]
|
||||
# NVMe Device Whitelist
|
||||
# Users may specify which NVMe devices to claim by their transport id.
|
||||
|
@ -74,6 +74,7 @@ struct spdk_nvmf_transport_opts {
|
||||
uint32_t buf_cache_size;
|
||||
uint32_t max_srq_depth;
|
||||
bool no_srq;
|
||||
bool c2h_success;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -483,6 +483,7 @@ spdk_nvmf_parse_transport(struct spdk_nvmf_parse_transport_ctx *ctx)
|
||||
struct spdk_nvmf_transport_opts opts = { 0 };
|
||||
enum spdk_nvme_transport_type trtype;
|
||||
struct spdk_nvmf_transport *transport;
|
||||
bool bval;
|
||||
int val;
|
||||
|
||||
type = spdk_conf_section_get_val(ctx->sp, "Type");
|
||||
@ -552,20 +553,31 @@ spdk_nvmf_parse_transport(struct spdk_nvmf_parse_transport_ctx *ctx)
|
||||
opts.max_srq_depth = val;
|
||||
} else {
|
||||
SPDK_ERRLOG("MaxSRQDepth is relevant only for RDMA transport '%s'\n", type);
|
||||
ctx->cb_fn(-1);
|
||||
free(ctx);
|
||||
return;
|
||||
goto error_out;
|
||||
}
|
||||
}
|
||||
|
||||
bval = spdk_conf_section_get_boolval(ctx->sp, "C2HSuccess", true);
|
||||
if (trtype == SPDK_NVME_TRANSPORT_TCP) {
|
||||
opts.c2h_success = bval;
|
||||
} else {
|
||||
SPDK_ERRLOG("C2HSuccess is relevant only for TCP transport '%s'\n", type);
|
||||
goto error_out;
|
||||
}
|
||||
|
||||
transport = spdk_nvmf_transport_create(trtype, &opts);
|
||||
if (transport) {
|
||||
spdk_nvmf_tgt_add_transport(g_spdk_nvmf_tgt, transport, spdk_nvmf_tgt_add_transport_done, ctx);
|
||||
} else {
|
||||
ctx->cb_fn(-1);
|
||||
free(ctx);
|
||||
return;
|
||||
goto error_out;
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
error_out:
|
||||
ctx->cb_fn(-1);
|
||||
free(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -1458,6 +1458,10 @@ static const struct spdk_json_object_decoder nvmf_rpc_create_transport_decoder[]
|
||||
"no_srq", offsetof(struct nvmf_rpc_create_transport_ctx, opts.no_srq),
|
||||
spdk_json_decode_bool, true
|
||||
},
|
||||
{
|
||||
"c2h_success", offsetof(struct nvmf_rpc_create_transport_ctx, opts.c2h_success),
|
||||
spdk_json_decode_bool, true
|
||||
},
|
||||
};
|
||||
|
||||
static void
|
||||
@ -1594,6 +1598,8 @@ dump_nvmf_transport(struct spdk_json_write_ctx *w, struct spdk_nvmf_transport *t
|
||||
if (type == SPDK_NVME_TRANSPORT_RDMA) {
|
||||
spdk_json_write_named_uint32(w, "max_srq_depth", opts->max_srq_depth);
|
||||
spdk_json_write_named_bool(w, "no_srq", opts->no_srq);
|
||||
} else if (type == SPDK_NVME_TRANSPORT_TCP) {
|
||||
spdk_json_write_named_bool(w, "c2h_success", opts->c2h_success);
|
||||
}
|
||||
|
||||
spdk_json_write_object_end(w);
|
||||
|
@ -55,9 +55,6 @@
|
||||
#define NVMF_TCP_PDU_MAX_C2H_DATA_SIZE 131072
|
||||
#define NVMF_TCP_QPAIR_MAX_C2H_PDU_NUM 64 /* Maximal c2h_data pdu number for ecah tqpair */
|
||||
|
||||
/* This is used to support the Linux kernel NVMe-oF initiator */
|
||||
#define LINUX_KERNEL_SUPPORT_NOT_SENDING_RESP_FOR_C2H 0
|
||||
|
||||
/* spdk nvmf related structure */
|
||||
enum spdk_nvmf_tcp_req_state {
|
||||
|
||||
@ -535,14 +532,15 @@ spdk_nvmf_tcp_create(struct spdk_nvmf_transport_opts *opts)
|
||||
" Transport opts: max_ioq_depth=%d, max_io_size=%d,\n"
|
||||
" max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
|
||||
" in_capsule_data_size=%d, max_aq_depth=%d\n"
|
||||
" num_shared_buffers=%d\n",
|
||||
" num_shared_buffers=%d, c2h_success=%d\n",
|
||||
opts->max_queue_depth,
|
||||
opts->max_io_size,
|
||||
opts->max_qpairs_per_ctrlr,
|
||||
opts->io_unit_size,
|
||||
opts->in_capsule_data_size,
|
||||
opts->max_aq_depth,
|
||||
opts->num_shared_buffers);
|
||||
opts->num_shared_buffers,
|
||||
opts->c2h_success);
|
||||
|
||||
/* I/O unit size cannot be larger than max I/O size */
|
||||
if (opts->io_unit_size > opts->max_io_size) {
|
||||
@ -1460,11 +1458,11 @@ spdk_nvmf_tcp_pdu_c2h_data_complete(void *cb_arg)
|
||||
assert(tcp_req->c2h_data_pdu_num > 0);
|
||||
tcp_req->c2h_data_pdu_num--;
|
||||
if (!tcp_req->c2h_data_pdu_num) {
|
||||
#if LINUX_KERNEL_SUPPORT_NOT_SENDING_RESP_FOR_C2H
|
||||
nvmf_tcp_request_free(tcp_req);
|
||||
#else
|
||||
spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
|
||||
#endif
|
||||
if (tqpair->qpair.transport->opts.c2h_success) {
|
||||
nvmf_tcp_request_free(tcp_req);
|
||||
} else {
|
||||
spdk_nvmf_tcp_send_capsule_resp_pdu(tcp_req, tqpair);
|
||||
}
|
||||
}
|
||||
|
||||
tqpair->c2h_data_pdu_cnt--;
|
||||
@ -2233,10 +2231,9 @@ spdk_nvmf_tcp_send_c2h_data(struct spdk_nvmf_tcp_qpair *tqpair,
|
||||
if (iov_index == (tcp_req->req.iovcnt - 1) && (tcp_req->c2h_data_offset == tcp_req->req.length)) {
|
||||
SPDK_DEBUGLOG(SPDK_LOG_NVMF_TCP, "Last pdu for tcp_req=%p on tqpair=%p\n", tcp_req, tqpair);
|
||||
c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
|
||||
/* The linux kernel does not support this yet */
|
||||
#if LINUX_KERNEL_SUPPORT_NOT_SENDING_RESP_FOR_C2H
|
||||
c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
|
||||
#endif
|
||||
if (tqpair->qpair.transport->opts.c2h_success) {
|
||||
c2h_data->common.flags |= SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
|
||||
}
|
||||
TAILQ_REMOVE(&tqpair->queued_c2h_data_tcp_req, tcp_req, link);
|
||||
}
|
||||
|
||||
@ -2748,6 +2745,7 @@ spdk_nvmf_tcp_qpair_set_sq_size(struct spdk_nvmf_qpair *qpair)
|
||||
#define SPDK_NVMF_TCP_DEFAULT_IO_UNIT_SIZE 131072
|
||||
#define SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS 511
|
||||
#define SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE 32
|
||||
#define SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION true
|
||||
|
||||
static void
|
||||
spdk_nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts)
|
||||
@ -2760,6 +2758,7 @@ spdk_nvmf_tcp_opts_init(struct spdk_nvmf_transport_opts *opts)
|
||||
opts->max_aq_depth = SPDK_NVMF_TCP_DEFAULT_AQ_DEPTH;
|
||||
opts->num_shared_buffers = SPDK_NVMF_TCP_DEFAULT_NUM_SHARED_BUFFERS;
|
||||
opts->buf_cache_size = SPDK_NVMF_TCP_DEFAULT_BUFFER_CACHE_SIZE;
|
||||
opts->c2h_success = SPDK_NVMF_TCP_DEFAULT_SUCCESS_OPTIMIZATION;
|
||||
}
|
||||
|
||||
const struct spdk_nvmf_transport_ops spdk_nvmf_transport_tcp = {
|
||||
|
@ -1410,7 +1410,8 @@ Format: 'user:u1 secret:s1 muser:mu1 msecret:ms1,user:u2 secret:s2 muser:mu2 mse
|
||||
num_shared_buffers=args.num_shared_buffers,
|
||||
buf_cache_size=args.buf_cache_size,
|
||||
max_srq_depth=args.max_srq_depth,
|
||||
no_srq=args.no_srq)
|
||||
no_srq=args.no_srq,
|
||||
c2h_success=args.c2h_success)
|
||||
|
||||
p = subparsers.add_parser('nvmf_create_transport', help='Create NVMf transport')
|
||||
p.add_argument('-t', '--trtype', help='Transport type (ex. RDMA)', type=str, required=True)
|
||||
@ -1424,6 +1425,7 @@ Format: 'user:u1 secret:s1 muser:mu1 msecret:ms1,user:u2 secret:s2 muser:mu2 mse
|
||||
p.add_argument('-b', '--buf-cache-size', help='The number of shared buffers to reserve for each poll group', type=int)
|
||||
p.add_argument('-s', '--max-srq-depth', help='Max number of outstanding I/O per SRQ. Relevant only for RDMA transport', type=int)
|
||||
p.add_argument('-r', '--no-srq', action='store_true', help='Disable per-thread shared receive queue. Relevant only for RDMA transport')
|
||||
p.add_argument('-o', '--c2h-success', help='Enable C2H success optimization. Relevant only for TCP transport', type=bool)
|
||||
p.set_defaults(func=nvmf_create_transport)
|
||||
|
||||
def get_nvmf_transports(args):
|
||||
|
@ -46,7 +46,8 @@ def nvmf_create_transport(client,
|
||||
num_shared_buffers=None,
|
||||
buf_cache_size=None,
|
||||
max_srq_depth=None,
|
||||
no_srq=False):
|
||||
no_srq=False,
|
||||
c2h_success=True):
|
||||
"""NVMf Transport Create options.
|
||||
|
||||
Args:
|
||||
@ -61,6 +62,7 @@ def nvmf_create_transport(client,
|
||||
buf_cache_size: The number of shared buffers to reserve for each poll group (optional)
|
||||
max_srq_depth: Max number of outstanding I/O per shared receive queue - RDMA specific (optional)
|
||||
no_srq: Boolean flag to disable SRQ even for devices that support it - RDMA specific (optional)
|
||||
c2h_success: Boolean flag to enable/disable the C2H success optimization - TCP specific (optional)
|
||||
|
||||
Returns:
|
||||
True or False
|
||||
@ -88,6 +90,8 @@ def nvmf_create_transport(client,
|
||||
params['max_srq_depth'] = max_srq_depth
|
||||
if no_srq:
|
||||
params['no_srq'] = no_srq
|
||||
if c2h_success:
|
||||
params['c2h_success'] = c2h_success
|
||||
return client.call('nvmf_create_transport', params)
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user