/* SPDX-License-Identifier: BSD-3-Clause * Copyright (C) 2018 Intel Corporation. All rights reserved. * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. */ /** \file * TCP network implementation abstraction layer */ #ifndef SPDK_INTERNAL_SOCK_H #define SPDK_INTERNAL_SOCK_H #include "spdk/stdinc.h" #include "spdk/sock.h" #include "spdk/queue.h" #include "spdk/likely.h" #ifdef __cplusplus extern "C" { #endif #define MAX_EVENTS_PER_POLL 32 #define DEFAULT_SOCK_PRIORITY 0 #define MIN_SOCK_PIPE_SIZE 1024 #define MIN_SO_RCVBUF_SIZE (2 * 1024 * 1024) #define MIN_SO_SNDBUF_SIZE (2 * 1024 * 1024) #define IOV_BATCH_SIZE 64 struct spdk_sock { struct spdk_net_impl *net_impl; struct spdk_sock_opts opts; struct spdk_sock_group_impl *group_impl; TAILQ_ENTRY(spdk_sock) link; TAILQ_HEAD(, spdk_sock_request) queued_reqs; TAILQ_HEAD(, spdk_sock_request) pending_reqs; struct spdk_sock_request *read_req; int queued_iovcnt; int cb_cnt; spdk_sock_cb cb_fn; void *cb_arg; struct { uint8_t closed : 1; uint8_t reserved : 7; } flags; struct spdk_sock_impl_opts impl_opts; }; struct spdk_sock_group { STAILQ_HEAD(, spdk_sock_group_impl) group_impls; void *ctx; }; struct spdk_sock_group_impl { struct spdk_net_impl *net_impl; struct spdk_sock_group *group; TAILQ_HEAD(, spdk_sock) socks; STAILQ_ENTRY(spdk_sock_group_impl) link; }; struct spdk_sock_map { STAILQ_HEAD(, spdk_sock_placement_id_entry) entries; pthread_mutex_t mtx; }; struct spdk_net_impl { const char *name; int priority; int (*getaddr)(struct spdk_sock *sock, char *saddr, int slen, uint16_t *sport, char *caddr, int clen, uint16_t *cport); struct spdk_sock *(*connect)(const char *ip, int port, struct spdk_sock_opts *opts); struct spdk_sock *(*listen)(const char *ip, int port, struct spdk_sock_opts *opts); struct spdk_sock *(*accept)(struct spdk_sock *sock); int (*close)(struct spdk_sock *sock); ssize_t (*recv)(struct spdk_sock *sock, void *buf, size_t len); ssize_t (*readv)(struct spdk_sock *sock, struct iovec *iov, int iovcnt); ssize_t (*writev)(struct spdk_sock *sock, struct iovec *iov, int iovcnt); void (*writev_async)(struct spdk_sock *sock, struct spdk_sock_request *req); void (*readv_async)(struct spdk_sock *sock, struct spdk_sock_request *req); int (*flush)(struct spdk_sock *sock); int (*set_recvlowat)(struct spdk_sock *sock, int nbytes); int (*set_recvbuf)(struct spdk_sock *sock, int sz); int (*set_sendbuf)(struct spdk_sock *sock, int sz); bool (*is_ipv6)(struct spdk_sock *sock); bool (*is_ipv4)(struct spdk_sock *sock); bool (*is_connected)(struct spdk_sock *sock); struct spdk_sock_group_impl *(*group_impl_get_optimal)(struct spdk_sock *sock, struct spdk_sock_group_impl *hint); struct spdk_sock_group_impl *(*group_impl_create)(void); int (*group_impl_add_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock); int (*group_impl_remove_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock); int (*group_impl_poll)(struct spdk_sock_group_impl *group, int max_events, struct spdk_sock **socks); int (*group_impl_close)(struct spdk_sock_group_impl *group); int (*get_opts)(struct spdk_sock_impl_opts *opts, size_t *len); int (*set_opts)(const struct spdk_sock_impl_opts *opts, size_t len); STAILQ_ENTRY(spdk_net_impl) link; }; void spdk_net_impl_register(struct spdk_net_impl *impl, int priority); #define SPDK_NET_IMPL_REGISTER(name, impl, priority) \ static void __attribute__((constructor)) net_impl_register_##name(void) \ { \ spdk_net_impl_register(impl, priority); \ } static inline void spdk_sock_request_queue(struct spdk_sock *sock, struct spdk_sock_request *req) { assert(req->internal.curr_list == NULL); TAILQ_INSERT_TAIL(&sock->queued_reqs, req, internal.link); #ifdef DEBUG req->internal.curr_list = &sock->queued_reqs; #endif sock->queued_iovcnt += req->iovcnt; } static inline void spdk_sock_request_pend(struct spdk_sock *sock, struct spdk_sock_request *req) { assert(req->internal.curr_list == &sock->queued_reqs); TAILQ_REMOVE(&sock->queued_reqs, req, internal.link); assert(sock->queued_iovcnt >= req->iovcnt); sock->queued_iovcnt -= req->iovcnt; TAILQ_INSERT_TAIL(&sock->pending_reqs, req, internal.link); #ifdef DEBUG req->internal.curr_list = &sock->pending_reqs; #endif } static inline int spdk_sock_request_complete(struct spdk_sock *sock, struct spdk_sock_request *req, int err) { bool closed; int rc = 0; req->internal.offset = 0; req->internal.is_zcopy = 0; closed = sock->flags.closed; sock->cb_cnt++; req->cb_fn(req->cb_arg, err); assert(sock->cb_cnt > 0); sock->cb_cnt--; if (sock->cb_cnt == 0 && !closed && sock->flags.closed) { /* The user closed the socket in response to a callback above. */ rc = -1; spdk_sock_close(&sock); } return rc; } static inline int spdk_sock_request_put(struct spdk_sock *sock, struct spdk_sock_request *req, int err) { assert(req->internal.curr_list == &sock->pending_reqs); TAILQ_REMOVE(&sock->pending_reqs, req, internal.link); #ifdef DEBUG req->internal.curr_list = NULL; #endif return spdk_sock_request_complete(sock, req, err); } static inline int spdk_sock_abort_requests(struct spdk_sock *sock) { struct spdk_sock_request *req; bool closed; int rc = 0; closed = sock->flags.closed; sock->cb_cnt++; req = TAILQ_FIRST(&sock->pending_reqs); while (req) { assert(req->internal.curr_list == &sock->pending_reqs); TAILQ_REMOVE(&sock->pending_reqs, req, internal.link); #ifdef DEBUG req->internal.curr_list = NULL; #endif req->cb_fn(req->cb_arg, -ECANCELED); req = TAILQ_FIRST(&sock->pending_reqs); } req = TAILQ_FIRST(&sock->queued_reqs); while (req) { assert(req->internal.curr_list == &sock->queued_reqs); TAILQ_REMOVE(&sock->queued_reqs, req, internal.link); #ifdef DEBUG req->internal.curr_list = NULL; #endif assert(sock->queued_iovcnt >= req->iovcnt); sock->queued_iovcnt -= req->iovcnt; req->cb_fn(req->cb_arg, -ECANCELED); req = TAILQ_FIRST(&sock->queued_reqs); } req = sock->read_req; if (req != NULL) { sock->read_req = NULL; req->cb_fn(req->cb_arg, -ECANCELED); } assert(sock->cb_cnt > 0); sock->cb_cnt--; assert(TAILQ_EMPTY(&sock->queued_reqs)); assert(TAILQ_EMPTY(&sock->pending_reqs)); if (sock->cb_cnt == 0 && !closed && sock->flags.closed) { /* The user closed the socket in response to a callback above. */ rc = -1; spdk_sock_close(&sock); } return rc; } static inline int spdk_sock_prep_req(struct spdk_sock_request *req, struct iovec *iovs, int index, uint64_t *num_bytes) { unsigned int offset; int iovcnt, i; assert(index < IOV_BATCH_SIZE); offset = req->internal.offset; iovcnt = index; for (i = 0; i < req->iovcnt; i++) { /* Consume any offset first */ if (offset >= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len) { offset -= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len; continue; } iovs[iovcnt].iov_base = SPDK_SOCK_REQUEST_IOV(req, i)->iov_base + offset; iovs[iovcnt].iov_len = SPDK_SOCK_REQUEST_IOV(req, i)->iov_len - offset; if (num_bytes != NULL) { *num_bytes += iovs[iovcnt].iov_len; } iovcnt++; offset = 0; if (iovcnt >= IOV_BATCH_SIZE) { break; } } return iovcnt; } static inline int spdk_sock_prep_reqs(struct spdk_sock *_sock, struct iovec *iovs, int index, struct spdk_sock_request **last_req, int *flags) { int iovcnt; struct spdk_sock_request *req; uint64_t total = 0; /* Gather an iov */ iovcnt = index; if (spdk_unlikely(iovcnt >= IOV_BATCH_SIZE)) { goto end; } if (last_req != NULL && *last_req != NULL) { req = TAILQ_NEXT(*last_req, internal.link); } else { req = TAILQ_FIRST(&_sock->queued_reqs); } while (req) { iovcnt = spdk_sock_prep_req(req, iovs, iovcnt, &total); if (iovcnt >= IOV_BATCH_SIZE) { break; } if (last_req != NULL) { *last_req = req; } req = TAILQ_NEXT(req, internal.link); } end: #if defined(MSG_ZEROCOPY) /* if data size < zerocopy_threshold, remove MSG_ZEROCOPY flag */ if (total < _sock->impl_opts.zerocopy_threshold && flags != NULL) { *flags = *flags & (~MSG_ZEROCOPY); } #endif return iovcnt; } static inline void spdk_sock_get_placement_id(int fd, enum spdk_placement_mode mode, int *placement_id) { *placement_id = -1; switch (mode) { case PLACEMENT_NONE: break; case PLACEMENT_MARK: case PLACEMENT_NAPI: { #if defined(SO_INCOMING_NAPI_ID) socklen_t len = sizeof(int); getsockopt(fd, SOL_SOCKET, SO_INCOMING_NAPI_ID, placement_id, &len); #endif break; } case PLACEMENT_CPU: { #if defined(SO_INCOMING_CPU) socklen_t len = sizeof(int); getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, placement_id, &len); #endif break; } default: break; } } /** * Insert a group into the placement map. * If the group is already in the map, take a reference. */ int spdk_sock_map_insert(struct spdk_sock_map *map, int placement_id, struct spdk_sock_group_impl *group_impl); /** * Release a reference for the given placement_id. If the reference count goes to 0, the * entry will no longer be associated with a group. */ void spdk_sock_map_release(struct spdk_sock_map *map, int placement_id); /** * Look up the group for the given placement_id. */ int spdk_sock_map_lookup(struct spdk_sock_map *map, int placement_id, struct spdk_sock_group_impl **group_impl, struct spdk_sock_group_impl *hint); /** * Find a placement id with no associated group */ int spdk_sock_map_find_free(struct spdk_sock_map *map); /** * Clean up all memory associated with the given map */ void spdk_sock_map_cleanup(struct spdk_sock_map *map); #ifdef __cplusplus } #endif #endif /* SPDK_INTERNAL_SOCK_H */