2022-06-03 19:15:11 +00:00
|
|
|
/* SPDX-License-Identifier: BSD-3-Clause
|
2020-01-28 11:54:47 +00:00
|
|
|
* Copyright (c) Intel Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
|
2018-02-16 18:14:32 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
/** \file
|
|
|
|
* TCP network implementation abstraction layer
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef SPDK_INTERNAL_SOCK_H
|
|
|
|
#define SPDK_INTERNAL_SOCK_H
|
|
|
|
|
|
|
|
#include "spdk/stdinc.h"
|
|
|
|
#include "spdk/sock.h"
|
|
|
|
#include "spdk/queue.h"
|
2020-11-26 11:56:51 +00:00
|
|
|
#include "spdk/likely.h"
|
2018-02-16 18:14:32 +00:00
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
2018-03-12 15:42:04 +00:00
|
|
|
#define MAX_EVENTS_PER_POLL 32
|
2020-01-07 14:23:16 +00:00
|
|
|
#define DEFAULT_SOCK_PRIORITY 0
|
2020-05-29 12:42:38 +00:00
|
|
|
#define MIN_SOCK_PIPE_SIZE 1024
|
2020-09-10 14:57:52 +00:00
|
|
|
#define MIN_SO_RCVBUF_SIZE (2 * 1024 * 1024)
|
|
|
|
#define MIN_SO_SNDBUF_SIZE (2 * 1024 * 1024)
|
2020-11-26 11:56:51 +00:00
|
|
|
#define IOV_BATCH_SIZE 64
|
2018-03-12 15:42:04 +00:00
|
|
|
|
|
|
|
struct spdk_sock {
|
2019-09-30 19:38:26 +00:00
|
|
|
struct spdk_net_impl *net_impl;
|
2020-02-19 11:18:51 +00:00
|
|
|
struct spdk_sock_opts opts;
|
2019-09-30 19:38:26 +00:00
|
|
|
struct spdk_sock_group_impl *group_impl;
|
|
|
|
TAILQ_ENTRY(spdk_sock) link;
|
2019-08-26 22:03:07 +00:00
|
|
|
|
|
|
|
TAILQ_HEAD(, spdk_sock_request) queued_reqs;
|
2019-10-18 17:50:27 +00:00
|
|
|
TAILQ_HEAD(, spdk_sock_request) pending_reqs;
|
2022-03-18 07:44:53 +00:00
|
|
|
struct spdk_sock_request *read_req;
|
2019-08-26 22:03:07 +00:00
|
|
|
int queued_iovcnt;
|
2020-09-01 11:03:34 +00:00
|
|
|
int cb_cnt;
|
|
|
|
spdk_sock_cb cb_fn;
|
|
|
|
void *cb_arg;
|
2019-08-26 22:03:07 +00:00
|
|
|
struct {
|
|
|
|
uint8_t closed : 1;
|
|
|
|
uint8_t reserved : 7;
|
|
|
|
} flags;
|
2022-07-06 15:58:14 +00:00
|
|
|
struct spdk_sock_impl_opts impl_opts;
|
2018-03-12 15:42:04 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct spdk_sock_group {
|
|
|
|
STAILQ_HEAD(, spdk_sock_group_impl) group_impls;
|
2019-05-14 18:40:20 +00:00
|
|
|
void *ctx;
|
2018-03-12 15:42:04 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct spdk_sock_group_impl {
|
|
|
|
struct spdk_net_impl *net_impl;
|
2021-03-30 20:47:18 +00:00
|
|
|
struct spdk_sock_group *group;
|
2018-03-12 15:42:04 +00:00
|
|
|
TAILQ_HEAD(, spdk_sock) socks;
|
|
|
|
STAILQ_ENTRY(spdk_sock_group_impl) link;
|
|
|
|
};
|
2018-02-16 18:14:32 +00:00
|
|
|
|
2021-04-01 20:07:17 +00:00
|
|
|
struct spdk_sock_map {
|
|
|
|
STAILQ_HEAD(, spdk_sock_placement_id_entry) entries;
|
|
|
|
pthread_mutex_t mtx;
|
|
|
|
};
|
|
|
|
|
2018-02-16 18:14:32 +00:00
|
|
|
struct spdk_net_impl {
|
|
|
|
const char *name;
|
2020-01-07 14:23:16 +00:00
|
|
|
int priority;
|
2018-02-16 18:14:32 +00:00
|
|
|
|
2018-10-08 01:51:03 +00:00
|
|
|
int (*getaddr)(struct spdk_sock *sock, char *saddr, int slen, uint16_t *sport, char *caddr,
|
|
|
|
int clen, uint16_t *cport);
|
2020-02-19 11:18:51 +00:00
|
|
|
struct spdk_sock *(*connect)(const char *ip, int port, struct spdk_sock_opts *opts);
|
|
|
|
struct spdk_sock *(*listen)(const char *ip, int port, struct spdk_sock_opts *opts);
|
2018-02-16 18:14:32 +00:00
|
|
|
struct spdk_sock *(*accept)(struct spdk_sock *sock);
|
|
|
|
int (*close)(struct spdk_sock *sock);
|
|
|
|
ssize_t (*recv)(struct spdk_sock *sock, void *buf, size_t len);
|
2019-02-26 23:58:24 +00:00
|
|
|
ssize_t (*readv)(struct spdk_sock *sock, struct iovec *iov, int iovcnt);
|
2018-02-16 18:14:32 +00:00
|
|
|
ssize_t (*writev)(struct spdk_sock *sock, struct iovec *iov, int iovcnt);
|
|
|
|
|
2019-08-26 22:03:07 +00:00
|
|
|
void (*writev_async)(struct spdk_sock *sock, struct spdk_sock_request *req);
|
2022-03-18 07:44:53 +00:00
|
|
|
void (*readv_async)(struct spdk_sock *sock, struct spdk_sock_request *req);
|
2019-11-21 15:16:18 +00:00
|
|
|
int (*flush)(struct spdk_sock *sock);
|
2019-08-26 22:03:07 +00:00
|
|
|
|
2018-02-16 18:14:32 +00:00
|
|
|
int (*set_recvlowat)(struct spdk_sock *sock, int nbytes);
|
|
|
|
int (*set_recvbuf)(struct spdk_sock *sock, int sz);
|
|
|
|
int (*set_sendbuf)(struct spdk_sock *sock, int sz);
|
|
|
|
|
|
|
|
bool (*is_ipv6)(struct spdk_sock *sock);
|
|
|
|
bool (*is_ipv4)(struct spdk_sock *sock);
|
2019-10-04 17:48:32 +00:00
|
|
|
bool (*is_connected)(struct spdk_sock *sock);
|
2018-02-16 18:14:32 +00:00
|
|
|
|
2021-11-17 13:19:58 +00:00
|
|
|
struct spdk_sock_group_impl *(*group_impl_get_optimal)(struct spdk_sock *sock,
|
|
|
|
struct spdk_sock_group_impl *hint);
|
2018-02-19 04:52:43 +00:00
|
|
|
struct spdk_sock_group_impl *(*group_impl_create)(void);
|
|
|
|
int (*group_impl_add_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock);
|
|
|
|
int (*group_impl_remove_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock);
|
|
|
|
int (*group_impl_poll)(struct spdk_sock_group_impl *group, int max_events,
|
|
|
|
struct spdk_sock **socks);
|
|
|
|
int (*group_impl_close)(struct spdk_sock_group_impl *group);
|
2018-02-16 18:14:32 +00:00
|
|
|
|
2020-01-28 11:54:47 +00:00
|
|
|
int (*get_opts)(struct spdk_sock_impl_opts *opts, size_t *len);
|
|
|
|
int (*set_opts)(const struct spdk_sock_impl_opts *opts, size_t len);
|
|
|
|
|
2018-02-16 18:14:32 +00:00
|
|
|
STAILQ_ENTRY(spdk_net_impl) link;
|
|
|
|
};
|
|
|
|
|
2020-01-07 14:23:16 +00:00
|
|
|
void spdk_net_impl_register(struct spdk_net_impl *impl, int priority);
|
2018-02-16 18:14:32 +00:00
|
|
|
|
2020-01-07 14:23:16 +00:00
|
|
|
#define SPDK_NET_IMPL_REGISTER(name, impl, priority) \
|
2018-02-16 18:14:32 +00:00
|
|
|
static void __attribute__((constructor)) net_impl_register_##name(void) \
|
|
|
|
{ \
|
2020-01-07 14:23:16 +00:00
|
|
|
spdk_net_impl_register(impl, priority); \
|
2018-02-16 18:14:32 +00:00
|
|
|
}
|
|
|
|
|
2019-08-26 22:03:07 +00:00
|
|
|
static inline void
|
|
|
|
spdk_sock_request_queue(struct spdk_sock *sock, struct spdk_sock_request *req)
|
|
|
|
{
|
2022-03-01 19:16:12 +00:00
|
|
|
assert(req->internal.curr_list == NULL);
|
2019-08-26 22:03:07 +00:00
|
|
|
TAILQ_INSERT_TAIL(&sock->queued_reqs, req, internal.link);
|
2022-03-01 19:16:12 +00:00
|
|
|
#ifdef DEBUG
|
|
|
|
req->internal.curr_list = &sock->queued_reqs;
|
|
|
|
#endif
|
2019-08-26 22:03:07 +00:00
|
|
|
sock->queued_iovcnt += req->iovcnt;
|
|
|
|
}
|
|
|
|
|
2019-10-18 17:50:27 +00:00
|
|
|
static inline void
|
|
|
|
spdk_sock_request_pend(struct spdk_sock *sock, struct spdk_sock_request *req)
|
|
|
|
{
|
2022-03-01 19:16:12 +00:00
|
|
|
assert(req->internal.curr_list == &sock->queued_reqs);
|
2019-10-18 17:50:27 +00:00
|
|
|
TAILQ_REMOVE(&sock->queued_reqs, req, internal.link);
|
|
|
|
assert(sock->queued_iovcnt >= req->iovcnt);
|
|
|
|
sock->queued_iovcnt -= req->iovcnt;
|
|
|
|
TAILQ_INSERT_TAIL(&sock->pending_reqs, req, internal.link);
|
2022-03-01 19:16:12 +00:00
|
|
|
#ifdef DEBUG
|
|
|
|
req->internal.curr_list = &sock->pending_reqs;
|
|
|
|
#endif
|
2019-10-18 17:50:27 +00:00
|
|
|
}
|
|
|
|
|
2019-08-26 22:03:07 +00:00
|
|
|
static inline int
|
2022-03-22 16:01:51 +00:00
|
|
|
spdk_sock_request_complete(struct spdk_sock *sock, struct spdk_sock_request *req, int err)
|
2019-08-26 22:03:07 +00:00
|
|
|
{
|
|
|
|
bool closed;
|
|
|
|
int rc = 0;
|
|
|
|
|
2019-10-09 21:06:15 +00:00
|
|
|
req->internal.offset = 0;
|
sock: introduce dynamic zerocopy according to data size
MSG_ZEROCOPY is not always effective as mentioned in
https://www.kernel.org/doc/html/v4.15/networking/msg_zerocopy.html.
Currently in spdk, once we enable sendmsg zerocopy, then all data
transferred through _sock_flush are sent with zerocopy, and vice
versa. Here dynamic zerocopy is introduced to allow data sent with
MSG_ZEROCOPY or not according to its size, which can be enabled by
setting "enable_dynamic_zerocopy" as true.
Test with 16 P4610 NVMe SSD, 2 initiators, target's and initiators'
configurations are the same as spdk report:
https://ci.spdk.io/download/performance-reports/SPDK_tcp_perf_report_2104.pdf
For posix socket, rw_percent=0(randwrite), it has 1.9%~8.3% performance boost
tested with target 1~40 cpu cores and qdepth=128,256,512. And it has no obvious
influence when read percentage is greater than 50%.
For uring socket, rw_percent=0(randwrite), it has 1.8%~7.9% performance boost
tested with target 1~40 cpu cores and qdepth=128,256,512. And it still has
1%~7% improvement when read percentage is greater than 50%.
The following is part of the detailed data.
posix:
qdepth=128
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 286.5 298.5 4.19% 307 304.15 -0.93%
4 1042.5 1107 6.19% 1135.5 1136 0.04%
8 1952.5 2058 5.40% 2170.5 2170.5 0.00%
12 2658.5 2879 8.29% 3042 3046 0.13%
16 3247.5 3460.5 6.56% 3793.5 3775 -0.49%
24 4232.5 4459.5 5.36% 4614.5 4756.5 3.08%
32 4810 5095 5.93% 4488 4845 7.95%
40 5306.5 5435 2.42% 4427.5 4902 10.72%
qdepth=512
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 275 287 4.36% 294.4 295.45 0.36%
4 979 1041 6.33% 1073 1083.5 0.98%
8 1822.5 1914.5 5.05% 2030.5 2018.5 -0.59%
12 2441 2598.5 6.45% 2808.5 2779.5 -1.03%
16 2920.5 3109.5 6.47% 3455 3411.5 -1.26%
24 3709 3972.5 7.10% 4483.5 4502.5 0.42%
32 4225.5 4532.5 7.27% 4463.5 4733 6.04%
40 4790.5 4884.5 1.96% 4427 4904.5 10.79%
uring:
qdepth=128
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 270.5 287.5 6.28% 295.75 304.75 3.04%
4 1018.5 1089.5 6.97% 1119.5 1156.5 3.31%
8 1907 2055 7.76% 2127 2211.5 3.97%
12 2614 2801 7.15% 2982.5 3061.5 2.65%
16 3169.5 3420 7.90% 3654.5 3781.5 3.48%
24 4109.5 4414 7.41% 4691.5 4750.5 1.26%
32 4752.5 4908 3.27% 4494 4825.5 7.38%
40 5233.5 5327 1.79% 4374.5 4891 11.81%
qdepth=512
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 259.95 276 6.17% 286.65 294.8 2.84%
4 955 1021 6.91% 1070.5 1100 2.76%
8 1772 1903.5 7.42% 1992.5 2077.5 4.27%
12 2380.5 2543.5 6.85% 2752.5 2860 3.91%
16 2920.5 3099 6.11% 3391.5 3540 4.38%
24 3697 3912 5.82% 4401 4637 5.36%
32 4256.5 4454.5 4.65% 4516 4777 5.78%
40 4707 4968.5 5.56% 4400.5 4933 12.10%
Signed-off-by: Richael Zhuang <richael.zhuang@arm.com>
Change-Id: I730dcf89ed2bf3efe91586421a89045fc11c81f0
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12210
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2021-12-07 09:37:02 +00:00
|
|
|
req->internal.is_zcopy = 0;
|
2019-10-09 21:06:15 +00:00
|
|
|
|
2019-08-26 22:03:07 +00:00
|
|
|
closed = sock->flags.closed;
|
|
|
|
sock->cb_cnt++;
|
|
|
|
req->cb_fn(req->cb_arg, err);
|
|
|
|
assert(sock->cb_cnt > 0);
|
|
|
|
sock->cb_cnt--;
|
|
|
|
|
|
|
|
if (sock->cb_cnt == 0 && !closed && sock->flags.closed) {
|
|
|
|
/* The user closed the socket in response to a callback above. */
|
|
|
|
rc = -1;
|
|
|
|
spdk_sock_close(&sock);
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2022-03-22 16:01:51 +00:00
|
|
|
static inline int
|
|
|
|
spdk_sock_request_put(struct spdk_sock *sock, struct spdk_sock_request *req, int err)
|
|
|
|
{
|
|
|
|
assert(req->internal.curr_list == &sock->pending_reqs);
|
|
|
|
TAILQ_REMOVE(&sock->pending_reqs, req, internal.link);
|
|
|
|
#ifdef DEBUG
|
|
|
|
req->internal.curr_list = NULL;
|
|
|
|
#endif
|
|
|
|
return spdk_sock_request_complete(sock, req, err);
|
|
|
|
}
|
|
|
|
|
2019-08-26 22:03:07 +00:00
|
|
|
static inline int
|
|
|
|
spdk_sock_abort_requests(struct spdk_sock *sock)
|
|
|
|
{
|
|
|
|
struct spdk_sock_request *req;
|
|
|
|
bool closed;
|
|
|
|
int rc = 0;
|
|
|
|
|
|
|
|
closed = sock->flags.closed;
|
|
|
|
sock->cb_cnt++;
|
|
|
|
|
2019-10-18 17:50:27 +00:00
|
|
|
req = TAILQ_FIRST(&sock->pending_reqs);
|
|
|
|
while (req) {
|
2022-03-01 19:16:12 +00:00
|
|
|
assert(req->internal.curr_list == &sock->pending_reqs);
|
2019-10-18 17:50:27 +00:00
|
|
|
TAILQ_REMOVE(&sock->pending_reqs, req, internal.link);
|
2022-03-01 19:16:12 +00:00
|
|
|
#ifdef DEBUG
|
|
|
|
req->internal.curr_list = NULL;
|
|
|
|
#endif
|
2019-10-18 17:50:27 +00:00
|
|
|
|
|
|
|
req->cb_fn(req->cb_arg, -ECANCELED);
|
|
|
|
|
|
|
|
req = TAILQ_FIRST(&sock->pending_reqs);
|
|
|
|
}
|
|
|
|
|
2019-08-26 22:03:07 +00:00
|
|
|
req = TAILQ_FIRST(&sock->queued_reqs);
|
|
|
|
while (req) {
|
2022-03-01 19:16:12 +00:00
|
|
|
assert(req->internal.curr_list == &sock->queued_reqs);
|
2019-08-26 22:03:07 +00:00
|
|
|
TAILQ_REMOVE(&sock->queued_reqs, req, internal.link);
|
2022-03-01 19:16:12 +00:00
|
|
|
#ifdef DEBUG
|
|
|
|
req->internal.curr_list = NULL;
|
|
|
|
#endif
|
2019-08-26 22:03:07 +00:00
|
|
|
|
|
|
|
assert(sock->queued_iovcnt >= req->iovcnt);
|
|
|
|
sock->queued_iovcnt -= req->iovcnt;
|
|
|
|
|
|
|
|
req->cb_fn(req->cb_arg, -ECANCELED);
|
|
|
|
|
|
|
|
req = TAILQ_FIRST(&sock->queued_reqs);
|
|
|
|
}
|
2022-03-18 07:44:53 +00:00
|
|
|
|
|
|
|
req = sock->read_req;
|
|
|
|
if (req != NULL) {
|
|
|
|
sock->read_req = NULL;
|
|
|
|
req->cb_fn(req->cb_arg, -ECANCELED);
|
|
|
|
}
|
2019-08-26 22:03:07 +00:00
|
|
|
assert(sock->cb_cnt > 0);
|
|
|
|
sock->cb_cnt--;
|
|
|
|
|
|
|
|
assert(TAILQ_EMPTY(&sock->queued_reqs));
|
2019-10-18 17:50:27 +00:00
|
|
|
assert(TAILQ_EMPTY(&sock->pending_reqs));
|
2019-08-26 22:03:07 +00:00
|
|
|
|
|
|
|
if (sock->cb_cnt == 0 && !closed && sock->flags.closed) {
|
|
|
|
/* The user closed the socket in response to a callback above. */
|
|
|
|
rc = -1;
|
|
|
|
spdk_sock_close(&sock);
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2022-04-25 13:45:12 +00:00
|
|
|
static inline int
|
|
|
|
spdk_sock_prep_req(struct spdk_sock_request *req, struct iovec *iovs, int index,
|
|
|
|
uint64_t *num_bytes)
|
|
|
|
{
|
|
|
|
unsigned int offset;
|
|
|
|
int iovcnt, i;
|
|
|
|
|
|
|
|
assert(index < IOV_BATCH_SIZE);
|
|
|
|
offset = req->internal.offset;
|
|
|
|
iovcnt = index;
|
|
|
|
|
|
|
|
for (i = 0; i < req->iovcnt; i++) {
|
|
|
|
/* Consume any offset first */
|
|
|
|
if (offset >= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len) {
|
|
|
|
offset -= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
iovs[iovcnt].iov_base = SPDK_SOCK_REQUEST_IOV(req, i)->iov_base + offset;
|
|
|
|
iovs[iovcnt].iov_len = SPDK_SOCK_REQUEST_IOV(req, i)->iov_len - offset;
|
|
|
|
if (num_bytes != NULL) {
|
|
|
|
*num_bytes += iovs[iovcnt].iov_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
iovcnt++;
|
|
|
|
offset = 0;
|
|
|
|
|
|
|
|
if (iovcnt >= IOV_BATCH_SIZE) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return iovcnt;
|
|
|
|
}
|
|
|
|
|
2020-11-26 11:56:51 +00:00
|
|
|
static inline int
|
|
|
|
spdk_sock_prep_reqs(struct spdk_sock *_sock, struct iovec *iovs, int index,
|
sock: introduce dynamic zerocopy according to data size
MSG_ZEROCOPY is not always effective as mentioned in
https://www.kernel.org/doc/html/v4.15/networking/msg_zerocopy.html.
Currently in spdk, once we enable sendmsg zerocopy, then all data
transferred through _sock_flush are sent with zerocopy, and vice
versa. Here dynamic zerocopy is introduced to allow data sent with
MSG_ZEROCOPY or not according to its size, which can be enabled by
setting "enable_dynamic_zerocopy" as true.
Test with 16 P4610 NVMe SSD, 2 initiators, target's and initiators'
configurations are the same as spdk report:
https://ci.spdk.io/download/performance-reports/SPDK_tcp_perf_report_2104.pdf
For posix socket, rw_percent=0(randwrite), it has 1.9%~8.3% performance boost
tested with target 1~40 cpu cores and qdepth=128,256,512. And it has no obvious
influence when read percentage is greater than 50%.
For uring socket, rw_percent=0(randwrite), it has 1.8%~7.9% performance boost
tested with target 1~40 cpu cores and qdepth=128,256,512. And it still has
1%~7% improvement when read percentage is greater than 50%.
The following is part of the detailed data.
posix:
qdepth=128
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 286.5 298.5 4.19% 307 304.15 -0.93%
4 1042.5 1107 6.19% 1135.5 1136 0.04%
8 1952.5 2058 5.40% 2170.5 2170.5 0.00%
12 2658.5 2879 8.29% 3042 3046 0.13%
16 3247.5 3460.5 6.56% 3793.5 3775 -0.49%
24 4232.5 4459.5 5.36% 4614.5 4756.5 3.08%
32 4810 5095 5.93% 4488 4845 7.95%
40 5306.5 5435 2.42% 4427.5 4902 10.72%
qdepth=512
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 275 287 4.36% 294.4 295.45 0.36%
4 979 1041 6.33% 1073 1083.5 0.98%
8 1822.5 1914.5 5.05% 2030.5 2018.5 -0.59%
12 2441 2598.5 6.45% 2808.5 2779.5 -1.03%
16 2920.5 3109.5 6.47% 3455 3411.5 -1.26%
24 3709 3972.5 7.10% 4483.5 4502.5 0.42%
32 4225.5 4532.5 7.27% 4463.5 4733 6.04%
40 4790.5 4884.5 1.96% 4427 4904.5 10.79%
uring:
qdepth=128
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 270.5 287.5 6.28% 295.75 304.75 3.04%
4 1018.5 1089.5 6.97% 1119.5 1156.5 3.31%
8 1907 2055 7.76% 2127 2211.5 3.97%
12 2614 2801 7.15% 2982.5 3061.5 2.65%
16 3169.5 3420 7.90% 3654.5 3781.5 3.48%
24 4109.5 4414 7.41% 4691.5 4750.5 1.26%
32 4752.5 4908 3.27% 4494 4825.5 7.38%
40 5233.5 5327 1.79% 4374.5 4891 11.81%
qdepth=512
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 259.95 276 6.17% 286.65 294.8 2.84%
4 955 1021 6.91% 1070.5 1100 2.76%
8 1772 1903.5 7.42% 1992.5 2077.5 4.27%
12 2380.5 2543.5 6.85% 2752.5 2860 3.91%
16 2920.5 3099 6.11% 3391.5 3540 4.38%
24 3697 3912 5.82% 4401 4637 5.36%
32 4256.5 4454.5 4.65% 4516 4777 5.78%
40 4707 4968.5 5.56% 4400.5 4933 12.10%
Signed-off-by: Richael Zhuang <richael.zhuang@arm.com>
Change-Id: I730dcf89ed2bf3efe91586421a89045fc11c81f0
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12210
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2021-12-07 09:37:02 +00:00
|
|
|
struct spdk_sock_request **last_req, int *flags)
|
2020-11-26 11:56:51 +00:00
|
|
|
{
|
2022-04-25 13:45:12 +00:00
|
|
|
int iovcnt;
|
2020-11-26 11:56:51 +00:00
|
|
|
struct spdk_sock_request *req;
|
sock: introduce dynamic zerocopy according to data size
MSG_ZEROCOPY is not always effective as mentioned in
https://www.kernel.org/doc/html/v4.15/networking/msg_zerocopy.html.
Currently in spdk, once we enable sendmsg zerocopy, then all data
transferred through _sock_flush are sent with zerocopy, and vice
versa. Here dynamic zerocopy is introduced to allow data sent with
MSG_ZEROCOPY or not according to its size, which can be enabled by
setting "enable_dynamic_zerocopy" as true.
Test with 16 P4610 NVMe SSD, 2 initiators, target's and initiators'
configurations are the same as spdk report:
https://ci.spdk.io/download/performance-reports/SPDK_tcp_perf_report_2104.pdf
For posix socket, rw_percent=0(randwrite), it has 1.9%~8.3% performance boost
tested with target 1~40 cpu cores and qdepth=128,256,512. And it has no obvious
influence when read percentage is greater than 50%.
For uring socket, rw_percent=0(randwrite), it has 1.8%~7.9% performance boost
tested with target 1~40 cpu cores and qdepth=128,256,512. And it still has
1%~7% improvement when read percentage is greater than 50%.
The following is part of the detailed data.
posix:
qdepth=128
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 286.5 298.5 4.19% 307 304.15 -0.93%
4 1042.5 1107 6.19% 1135.5 1136 0.04%
8 1952.5 2058 5.40% 2170.5 2170.5 0.00%
12 2658.5 2879 8.29% 3042 3046 0.13%
16 3247.5 3460.5 6.56% 3793.5 3775 -0.49%
24 4232.5 4459.5 5.36% 4614.5 4756.5 3.08%
32 4810 5095 5.93% 4488 4845 7.95%
40 5306.5 5435 2.42% 4427.5 4902 10.72%
qdepth=512
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 275 287 4.36% 294.4 295.45 0.36%
4 979 1041 6.33% 1073 1083.5 0.98%
8 1822.5 1914.5 5.05% 2030.5 2018.5 -0.59%
12 2441 2598.5 6.45% 2808.5 2779.5 -1.03%
16 2920.5 3109.5 6.47% 3455 3411.5 -1.26%
24 3709 3972.5 7.10% 4483.5 4502.5 0.42%
32 4225.5 4532.5 7.27% 4463.5 4733 6.04%
40 4790.5 4884.5 1.96% 4427 4904.5 10.79%
uring:
qdepth=128
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 270.5 287.5 6.28% 295.75 304.75 3.04%
4 1018.5 1089.5 6.97% 1119.5 1156.5 3.31%
8 1907 2055 7.76% 2127 2211.5 3.97%
12 2614 2801 7.15% 2982.5 3061.5 2.65%
16 3169.5 3420 7.90% 3654.5 3781.5 3.48%
24 4109.5 4414 7.41% 4691.5 4750.5 1.26%
32 4752.5 4908 3.27% 4494 4825.5 7.38%
40 5233.5 5327 1.79% 4374.5 4891 11.81%
qdepth=512
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 259.95 276 6.17% 286.65 294.8 2.84%
4 955 1021 6.91% 1070.5 1100 2.76%
8 1772 1903.5 7.42% 1992.5 2077.5 4.27%
12 2380.5 2543.5 6.85% 2752.5 2860 3.91%
16 2920.5 3099 6.11% 3391.5 3540 4.38%
24 3697 3912 5.82% 4401 4637 5.36%
32 4256.5 4454.5 4.65% 4516 4777 5.78%
40 4707 4968.5 5.56% 4400.5 4933 12.10%
Signed-off-by: Richael Zhuang <richael.zhuang@arm.com>
Change-Id: I730dcf89ed2bf3efe91586421a89045fc11c81f0
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12210
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2021-12-07 09:37:02 +00:00
|
|
|
uint64_t total = 0;
|
2020-11-26 11:56:51 +00:00
|
|
|
|
|
|
|
/* Gather an iov */
|
|
|
|
iovcnt = index;
|
|
|
|
if (spdk_unlikely(iovcnt >= IOV_BATCH_SIZE)) {
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (last_req != NULL && *last_req != NULL) {
|
|
|
|
req = TAILQ_NEXT(*last_req, internal.link);
|
|
|
|
} else {
|
|
|
|
req = TAILQ_FIRST(&_sock->queued_reqs);
|
|
|
|
}
|
|
|
|
|
|
|
|
while (req) {
|
2022-04-25 13:45:12 +00:00
|
|
|
iovcnt = spdk_sock_prep_req(req, iovs, iovcnt, &total);
|
2020-11-26 11:56:51 +00:00
|
|
|
if (iovcnt >= IOV_BATCH_SIZE) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (last_req != NULL) {
|
|
|
|
*last_req = req;
|
|
|
|
}
|
|
|
|
req = TAILQ_NEXT(req, internal.link);
|
|
|
|
}
|
|
|
|
|
|
|
|
end:
|
sock: introduce dynamic zerocopy according to data size
MSG_ZEROCOPY is not always effective as mentioned in
https://www.kernel.org/doc/html/v4.15/networking/msg_zerocopy.html.
Currently in spdk, once we enable sendmsg zerocopy, then all data
transferred through _sock_flush are sent with zerocopy, and vice
versa. Here dynamic zerocopy is introduced to allow data sent with
MSG_ZEROCOPY or not according to its size, which can be enabled by
setting "enable_dynamic_zerocopy" as true.
Test with 16 P4610 NVMe SSD, 2 initiators, target's and initiators'
configurations are the same as spdk report:
https://ci.spdk.io/download/performance-reports/SPDK_tcp_perf_report_2104.pdf
For posix socket, rw_percent=0(randwrite), it has 1.9%~8.3% performance boost
tested with target 1~40 cpu cores and qdepth=128,256,512. And it has no obvious
influence when read percentage is greater than 50%.
For uring socket, rw_percent=0(randwrite), it has 1.8%~7.9% performance boost
tested with target 1~40 cpu cores and qdepth=128,256,512. And it still has
1%~7% improvement when read percentage is greater than 50%.
The following is part of the detailed data.
posix:
qdepth=128
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 286.5 298.5 4.19% 307 304.15 -0.93%
4 1042.5 1107 6.19% 1135.5 1136 0.04%
8 1952.5 2058 5.40% 2170.5 2170.5 0.00%
12 2658.5 2879 8.29% 3042 3046 0.13%
16 3247.5 3460.5 6.56% 3793.5 3775 -0.49%
24 4232.5 4459.5 5.36% 4614.5 4756.5 3.08%
32 4810 5095 5.93% 4488 4845 7.95%
40 5306.5 5435 2.42% 4427.5 4902 10.72%
qdepth=512
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 275 287 4.36% 294.4 295.45 0.36%
4 979 1041 6.33% 1073 1083.5 0.98%
8 1822.5 1914.5 5.05% 2030.5 2018.5 -0.59%
12 2441 2598.5 6.45% 2808.5 2779.5 -1.03%
16 2920.5 3109.5 6.47% 3455 3411.5 -1.26%
24 3709 3972.5 7.10% 4483.5 4502.5 0.42%
32 4225.5 4532.5 7.27% 4463.5 4733 6.04%
40 4790.5 4884.5 1.96% 4427 4904.5 10.79%
uring:
qdepth=128
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 270.5 287.5 6.28% 295.75 304.75 3.04%
4 1018.5 1089.5 6.97% 1119.5 1156.5 3.31%
8 1907 2055 7.76% 2127 2211.5 3.97%
12 2614 2801 7.15% 2982.5 3061.5 2.65%
16 3169.5 3420 7.90% 3654.5 3781.5 3.48%
24 4109.5 4414 7.41% 4691.5 4750.5 1.26%
32 4752.5 4908 3.27% 4494 4825.5 7.38%
40 5233.5 5327 1.79% 4374.5 4891 11.81%
qdepth=512
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 259.95 276 6.17% 286.65 294.8 2.84%
4 955 1021 6.91% 1070.5 1100 2.76%
8 1772 1903.5 7.42% 1992.5 2077.5 4.27%
12 2380.5 2543.5 6.85% 2752.5 2860 3.91%
16 2920.5 3099 6.11% 3391.5 3540 4.38%
24 3697 3912 5.82% 4401 4637 5.36%
32 4256.5 4454.5 4.65% 4516 4777 5.78%
40 4707 4968.5 5.56% 4400.5 4933 12.10%
Signed-off-by: Richael Zhuang <richael.zhuang@arm.com>
Change-Id: I730dcf89ed2bf3efe91586421a89045fc11c81f0
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12210
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2021-12-07 09:37:02 +00:00
|
|
|
|
|
|
|
#if defined(MSG_ZEROCOPY)
|
|
|
|
/* if data size < zerocopy_threshold, remove MSG_ZEROCOPY flag */
|
2022-07-13 11:39:05 +00:00
|
|
|
if (total < _sock->impl_opts.zerocopy_threshold && flags != NULL) {
|
sock: introduce dynamic zerocopy according to data size
MSG_ZEROCOPY is not always effective as mentioned in
https://www.kernel.org/doc/html/v4.15/networking/msg_zerocopy.html.
Currently in spdk, once we enable sendmsg zerocopy, then all data
transferred through _sock_flush are sent with zerocopy, and vice
versa. Here dynamic zerocopy is introduced to allow data sent with
MSG_ZEROCOPY or not according to its size, which can be enabled by
setting "enable_dynamic_zerocopy" as true.
Test with 16 P4610 NVMe SSD, 2 initiators, target's and initiators'
configurations are the same as spdk report:
https://ci.spdk.io/download/performance-reports/SPDK_tcp_perf_report_2104.pdf
For posix socket, rw_percent=0(randwrite), it has 1.9%~8.3% performance boost
tested with target 1~40 cpu cores and qdepth=128,256,512. And it has no obvious
influence when read percentage is greater than 50%.
For uring socket, rw_percent=0(randwrite), it has 1.8%~7.9% performance boost
tested with target 1~40 cpu cores and qdepth=128,256,512. And it still has
1%~7% improvement when read percentage is greater than 50%.
The following is part of the detailed data.
posix:
qdepth=128
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 286.5 298.5 4.19% 307 304.15 -0.93%
4 1042.5 1107 6.19% 1135.5 1136 0.04%
8 1952.5 2058 5.40% 2170.5 2170.5 0.00%
12 2658.5 2879 8.29% 3042 3046 0.13%
16 3247.5 3460.5 6.56% 3793.5 3775 -0.49%
24 4232.5 4459.5 5.36% 4614.5 4756.5 3.08%
32 4810 5095 5.93% 4488 4845 7.95%
40 5306.5 5435 2.42% 4427.5 4902 10.72%
qdepth=512
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 275 287 4.36% 294.4 295.45 0.36%
4 979 1041 6.33% 1073 1083.5 0.98%
8 1822.5 1914.5 5.05% 2030.5 2018.5 -0.59%
12 2441 2598.5 6.45% 2808.5 2779.5 -1.03%
16 2920.5 3109.5 6.47% 3455 3411.5 -1.26%
24 3709 3972.5 7.10% 4483.5 4502.5 0.42%
32 4225.5 4532.5 7.27% 4463.5 4733 6.04%
40 4790.5 4884.5 1.96% 4427 4904.5 10.79%
uring:
qdepth=128
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 270.5 287.5 6.28% 295.75 304.75 3.04%
4 1018.5 1089.5 6.97% 1119.5 1156.5 3.31%
8 1907 2055 7.76% 2127 2211.5 3.97%
12 2614 2801 7.15% 2982.5 3061.5 2.65%
16 3169.5 3420 7.90% 3654.5 3781.5 3.48%
24 4109.5 4414 7.41% 4691.5 4750.5 1.26%
32 4752.5 4908 3.27% 4494 4825.5 7.38%
40 5233.5 5327 1.79% 4374.5 4891 11.81%
qdepth=512
rw_percent 0 | 30
cpu origin thisPatch opt | origin thisPatch opt
1 259.95 276 6.17% 286.65 294.8 2.84%
4 955 1021 6.91% 1070.5 1100 2.76%
8 1772 1903.5 7.42% 1992.5 2077.5 4.27%
12 2380.5 2543.5 6.85% 2752.5 2860 3.91%
16 2920.5 3099 6.11% 3391.5 3540 4.38%
24 3697 3912 5.82% 4401 4637 5.36%
32 4256.5 4454.5 4.65% 4516 4777 5.78%
40 4707 4968.5 5.56% 4400.5 4933 12.10%
Signed-off-by: Richael Zhuang <richael.zhuang@arm.com>
Change-Id: I730dcf89ed2bf3efe91586421a89045fc11c81f0
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12210
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
2021-12-07 09:37:02 +00:00
|
|
|
*flags = *flags & (~MSG_ZEROCOPY);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2020-11-26 11:56:51 +00:00
|
|
|
return iovcnt;
|
|
|
|
}
|
|
|
|
|
2021-03-30 20:01:23 +00:00
|
|
|
static inline void
|
|
|
|
spdk_sock_get_placement_id(int fd, enum spdk_placement_mode mode, int *placement_id)
|
|
|
|
{
|
|
|
|
*placement_id = -1;
|
|
|
|
|
|
|
|
switch (mode) {
|
|
|
|
case PLACEMENT_NONE:
|
|
|
|
break;
|
2021-04-06 21:48:51 +00:00
|
|
|
case PLACEMENT_MARK:
|
2021-03-30 20:01:23 +00:00
|
|
|
case PLACEMENT_NAPI: {
|
|
|
|
#if defined(SO_INCOMING_NAPI_ID)
|
|
|
|
socklen_t len = sizeof(int);
|
|
|
|
|
|
|
|
getsockopt(fd, SOL_SOCKET, SO_INCOMING_NAPI_ID, placement_id, &len);
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case PLACEMENT_CPU: {
|
|
|
|
#if defined(SO_INCOMING_CPU)
|
|
|
|
socklen_t len = sizeof(int);
|
|
|
|
|
|
|
|
getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, placement_id, &len);
|
|
|
|
#endif
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-30 20:39:30 +00:00
|
|
|
/**
|
|
|
|
* Insert a group into the placement map.
|
|
|
|
* If the group is already in the map, take a reference.
|
|
|
|
*/
|
|
|
|
int spdk_sock_map_insert(struct spdk_sock_map *map, int placement_id,
|
2021-04-01 20:22:28 +00:00
|
|
|
struct spdk_sock_group_impl *group_impl);
|
2021-03-30 20:39:30 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Release a reference for the given placement_id. If the reference count goes to 0, the
|
|
|
|
* entry will no longer be associated with a group.
|
|
|
|
*/
|
|
|
|
void spdk_sock_map_release(struct spdk_sock_map *map, int placement_id);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Look up the group for the given placement_id.
|
|
|
|
*/
|
|
|
|
int spdk_sock_map_lookup(struct spdk_sock_map *map, int placement_id,
|
2021-11-17 13:19:58 +00:00
|
|
|
struct spdk_sock_group_impl **group_impl, struct spdk_sock_group_impl *hint);
|
2021-03-30 20:39:30 +00:00
|
|
|
|
2021-04-01 22:03:08 +00:00
|
|
|
/**
|
|
|
|
* Find a placement id with no associated group
|
|
|
|
*/
|
|
|
|
int spdk_sock_map_find_free(struct spdk_sock_map *map);
|
|
|
|
|
2021-04-01 20:07:17 +00:00
|
|
|
/**
|
|
|
|
* Clean up all memory associated with the given map
|
|
|
|
*/
|
|
|
|
void spdk_sock_map_cleanup(struct spdk_sock_map *map);
|
|
|
|
|
2018-02-16 18:14:32 +00:00
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* SPDK_INTERNAL_SOCK_H */
|