per Intel policy to include file commit date using git cmd below. The policy does not apply to non-Intel (C) notices. git log --follow -C90% --format=%ad --date default <file> | tail -1 and then pull just the 4 digit year from the result. Intel copyrights were not added to files where Intel either had no contribution ot the contribution lacked substance (ie license header updates, formatting changes, etc). Contribution date used "--follow -C95%" to get the most accurate date. Note that several files in this patch didn't end the license/(c) block with a blank comment line so these were added as the vast majority of files do have this last blank line. Simply there for consistency. Signed-off-by: paul luse <paul.e.luse@intel.com> Change-Id: Id5b7ce4f658fe87132f14139ead58d6e285c04d4 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15192 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Community-CI: Mellanox Build Bot
376 lines
9.5 KiB
C
376 lines
9.5 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright (C) 2018 Intel Corporation. All rights reserved.
|
|
* Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
|
|
*/
|
|
|
|
/** \file
|
|
* TCP network implementation abstraction layer
|
|
*/
|
|
|
|
#ifndef SPDK_INTERNAL_SOCK_H
|
|
#define SPDK_INTERNAL_SOCK_H
|
|
|
|
#include "spdk/stdinc.h"
|
|
#include "spdk/sock.h"
|
|
#include "spdk/queue.h"
|
|
#include "spdk/likely.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#define MAX_EVENTS_PER_POLL 32
|
|
#define DEFAULT_SOCK_PRIORITY 0
|
|
#define MIN_SOCK_PIPE_SIZE 1024
|
|
#define MIN_SO_RCVBUF_SIZE (2 * 1024 * 1024)
|
|
#define MIN_SO_SNDBUF_SIZE (2 * 1024 * 1024)
|
|
#define IOV_BATCH_SIZE 64
|
|
|
|
struct spdk_sock {
|
|
struct spdk_net_impl *net_impl;
|
|
struct spdk_sock_opts opts;
|
|
struct spdk_sock_group_impl *group_impl;
|
|
TAILQ_ENTRY(spdk_sock) link;
|
|
|
|
TAILQ_HEAD(, spdk_sock_request) queued_reqs;
|
|
TAILQ_HEAD(, spdk_sock_request) pending_reqs;
|
|
struct spdk_sock_request *read_req;
|
|
int queued_iovcnt;
|
|
int cb_cnt;
|
|
spdk_sock_cb cb_fn;
|
|
void *cb_arg;
|
|
struct {
|
|
uint8_t closed : 1;
|
|
uint8_t reserved : 7;
|
|
} flags;
|
|
struct spdk_sock_impl_opts impl_opts;
|
|
};
|
|
|
|
struct spdk_sock_group {
|
|
STAILQ_HEAD(, spdk_sock_group_impl) group_impls;
|
|
void *ctx;
|
|
};
|
|
|
|
struct spdk_sock_group_impl {
|
|
struct spdk_net_impl *net_impl;
|
|
struct spdk_sock_group *group;
|
|
TAILQ_HEAD(, spdk_sock) socks;
|
|
STAILQ_ENTRY(spdk_sock_group_impl) link;
|
|
};
|
|
|
|
struct spdk_sock_map {
|
|
STAILQ_HEAD(, spdk_sock_placement_id_entry) entries;
|
|
pthread_mutex_t mtx;
|
|
};
|
|
|
|
struct spdk_net_impl {
|
|
const char *name;
|
|
int priority;
|
|
|
|
int (*getaddr)(struct spdk_sock *sock, char *saddr, int slen, uint16_t *sport, char *caddr,
|
|
int clen, uint16_t *cport);
|
|
struct spdk_sock *(*connect)(const char *ip, int port, struct spdk_sock_opts *opts);
|
|
struct spdk_sock *(*listen)(const char *ip, int port, struct spdk_sock_opts *opts);
|
|
struct spdk_sock *(*accept)(struct spdk_sock *sock);
|
|
int (*close)(struct spdk_sock *sock);
|
|
ssize_t (*recv)(struct spdk_sock *sock, void *buf, size_t len);
|
|
ssize_t (*readv)(struct spdk_sock *sock, struct iovec *iov, int iovcnt);
|
|
ssize_t (*writev)(struct spdk_sock *sock, struct iovec *iov, int iovcnt);
|
|
|
|
void (*writev_async)(struct spdk_sock *sock, struct spdk_sock_request *req);
|
|
void (*readv_async)(struct spdk_sock *sock, struct spdk_sock_request *req);
|
|
int (*flush)(struct spdk_sock *sock);
|
|
|
|
int (*set_recvlowat)(struct spdk_sock *sock, int nbytes);
|
|
int (*set_recvbuf)(struct spdk_sock *sock, int sz);
|
|
int (*set_sendbuf)(struct spdk_sock *sock, int sz);
|
|
|
|
bool (*is_ipv6)(struct spdk_sock *sock);
|
|
bool (*is_ipv4)(struct spdk_sock *sock);
|
|
bool (*is_connected)(struct spdk_sock *sock);
|
|
|
|
struct spdk_sock_group_impl *(*group_impl_get_optimal)(struct spdk_sock *sock,
|
|
struct spdk_sock_group_impl *hint);
|
|
struct spdk_sock_group_impl *(*group_impl_create)(void);
|
|
int (*group_impl_add_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock);
|
|
int (*group_impl_remove_sock)(struct spdk_sock_group_impl *group, struct spdk_sock *sock);
|
|
int (*group_impl_poll)(struct spdk_sock_group_impl *group, int max_events,
|
|
struct spdk_sock **socks);
|
|
int (*group_impl_close)(struct spdk_sock_group_impl *group);
|
|
|
|
int (*get_opts)(struct spdk_sock_impl_opts *opts, size_t *len);
|
|
int (*set_opts)(const struct spdk_sock_impl_opts *opts, size_t len);
|
|
|
|
STAILQ_ENTRY(spdk_net_impl) link;
|
|
};
|
|
|
|
void spdk_net_impl_register(struct spdk_net_impl *impl, int priority);
|
|
|
|
#define SPDK_NET_IMPL_REGISTER(name, impl, priority) \
|
|
static void __attribute__((constructor)) net_impl_register_##name(void) \
|
|
{ \
|
|
spdk_net_impl_register(impl, priority); \
|
|
}
|
|
|
|
static inline void
|
|
spdk_sock_request_queue(struct spdk_sock *sock, struct spdk_sock_request *req)
|
|
{
|
|
assert(req->internal.curr_list == NULL);
|
|
TAILQ_INSERT_TAIL(&sock->queued_reqs, req, internal.link);
|
|
#ifdef DEBUG
|
|
req->internal.curr_list = &sock->queued_reqs;
|
|
#endif
|
|
sock->queued_iovcnt += req->iovcnt;
|
|
}
|
|
|
|
static inline void
|
|
spdk_sock_request_pend(struct spdk_sock *sock, struct spdk_sock_request *req)
|
|
{
|
|
assert(req->internal.curr_list == &sock->queued_reqs);
|
|
TAILQ_REMOVE(&sock->queued_reqs, req, internal.link);
|
|
assert(sock->queued_iovcnt >= req->iovcnt);
|
|
sock->queued_iovcnt -= req->iovcnt;
|
|
TAILQ_INSERT_TAIL(&sock->pending_reqs, req, internal.link);
|
|
#ifdef DEBUG
|
|
req->internal.curr_list = &sock->pending_reqs;
|
|
#endif
|
|
}
|
|
|
|
static inline int
|
|
spdk_sock_request_complete(struct spdk_sock *sock, struct spdk_sock_request *req, int err)
|
|
{
|
|
bool closed;
|
|
int rc = 0;
|
|
|
|
req->internal.offset = 0;
|
|
req->internal.is_zcopy = 0;
|
|
|
|
closed = sock->flags.closed;
|
|
sock->cb_cnt++;
|
|
req->cb_fn(req->cb_arg, err);
|
|
assert(sock->cb_cnt > 0);
|
|
sock->cb_cnt--;
|
|
|
|
if (sock->cb_cnt == 0 && !closed && sock->flags.closed) {
|
|
/* The user closed the socket in response to a callback above. */
|
|
rc = -1;
|
|
spdk_sock_close(&sock);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
static inline int
|
|
spdk_sock_request_put(struct spdk_sock *sock, struct spdk_sock_request *req, int err)
|
|
{
|
|
assert(req->internal.curr_list == &sock->pending_reqs);
|
|
TAILQ_REMOVE(&sock->pending_reqs, req, internal.link);
|
|
#ifdef DEBUG
|
|
req->internal.curr_list = NULL;
|
|
#endif
|
|
return spdk_sock_request_complete(sock, req, err);
|
|
}
|
|
|
|
static inline int
|
|
spdk_sock_abort_requests(struct spdk_sock *sock)
|
|
{
|
|
struct spdk_sock_request *req;
|
|
bool closed;
|
|
int rc = 0;
|
|
|
|
closed = sock->flags.closed;
|
|
sock->cb_cnt++;
|
|
|
|
req = TAILQ_FIRST(&sock->pending_reqs);
|
|
while (req) {
|
|
assert(req->internal.curr_list == &sock->pending_reqs);
|
|
TAILQ_REMOVE(&sock->pending_reqs, req, internal.link);
|
|
#ifdef DEBUG
|
|
req->internal.curr_list = NULL;
|
|
#endif
|
|
|
|
req->cb_fn(req->cb_arg, -ECANCELED);
|
|
|
|
req = TAILQ_FIRST(&sock->pending_reqs);
|
|
}
|
|
|
|
req = TAILQ_FIRST(&sock->queued_reqs);
|
|
while (req) {
|
|
assert(req->internal.curr_list == &sock->queued_reqs);
|
|
TAILQ_REMOVE(&sock->queued_reqs, req, internal.link);
|
|
#ifdef DEBUG
|
|
req->internal.curr_list = NULL;
|
|
#endif
|
|
|
|
assert(sock->queued_iovcnt >= req->iovcnt);
|
|
sock->queued_iovcnt -= req->iovcnt;
|
|
|
|
req->cb_fn(req->cb_arg, -ECANCELED);
|
|
|
|
req = TAILQ_FIRST(&sock->queued_reqs);
|
|
}
|
|
|
|
req = sock->read_req;
|
|
if (req != NULL) {
|
|
sock->read_req = NULL;
|
|
req->cb_fn(req->cb_arg, -ECANCELED);
|
|
}
|
|
assert(sock->cb_cnt > 0);
|
|
sock->cb_cnt--;
|
|
|
|
assert(TAILQ_EMPTY(&sock->queued_reqs));
|
|
assert(TAILQ_EMPTY(&sock->pending_reqs));
|
|
|
|
if (sock->cb_cnt == 0 && !closed && sock->flags.closed) {
|
|
/* The user closed the socket in response to a callback above. */
|
|
rc = -1;
|
|
spdk_sock_close(&sock);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
static inline int
|
|
spdk_sock_prep_req(struct spdk_sock_request *req, struct iovec *iovs, int index,
|
|
uint64_t *num_bytes)
|
|
{
|
|
unsigned int offset;
|
|
int iovcnt, i;
|
|
|
|
assert(index < IOV_BATCH_SIZE);
|
|
offset = req->internal.offset;
|
|
iovcnt = index;
|
|
|
|
for (i = 0; i < req->iovcnt; i++) {
|
|
/* Consume any offset first */
|
|
if (offset >= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len) {
|
|
offset -= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len;
|
|
continue;
|
|
}
|
|
|
|
iovs[iovcnt].iov_base = SPDK_SOCK_REQUEST_IOV(req, i)->iov_base + offset;
|
|
iovs[iovcnt].iov_len = SPDK_SOCK_REQUEST_IOV(req, i)->iov_len - offset;
|
|
if (num_bytes != NULL) {
|
|
*num_bytes += iovs[iovcnt].iov_len;
|
|
}
|
|
|
|
iovcnt++;
|
|
offset = 0;
|
|
|
|
if (iovcnt >= IOV_BATCH_SIZE) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return iovcnt;
|
|
}
|
|
|
|
static inline int
|
|
spdk_sock_prep_reqs(struct spdk_sock *_sock, struct iovec *iovs, int index,
|
|
struct spdk_sock_request **last_req, int *flags)
|
|
{
|
|
int iovcnt;
|
|
struct spdk_sock_request *req;
|
|
uint64_t total = 0;
|
|
|
|
/* Gather an iov */
|
|
iovcnt = index;
|
|
if (spdk_unlikely(iovcnt >= IOV_BATCH_SIZE)) {
|
|
goto end;
|
|
}
|
|
|
|
if (last_req != NULL && *last_req != NULL) {
|
|
req = TAILQ_NEXT(*last_req, internal.link);
|
|
} else {
|
|
req = TAILQ_FIRST(&_sock->queued_reqs);
|
|
}
|
|
|
|
while (req) {
|
|
iovcnt = spdk_sock_prep_req(req, iovs, iovcnt, &total);
|
|
if (iovcnt >= IOV_BATCH_SIZE) {
|
|
break;
|
|
}
|
|
|
|
if (last_req != NULL) {
|
|
*last_req = req;
|
|
}
|
|
req = TAILQ_NEXT(req, internal.link);
|
|
}
|
|
|
|
end:
|
|
|
|
#if defined(MSG_ZEROCOPY)
|
|
/* if data size < zerocopy_threshold, remove MSG_ZEROCOPY flag */
|
|
if (total < _sock->impl_opts.zerocopy_threshold && flags != NULL) {
|
|
*flags = *flags & (~MSG_ZEROCOPY);
|
|
}
|
|
#endif
|
|
|
|
return iovcnt;
|
|
}
|
|
|
|
static inline void
|
|
spdk_sock_get_placement_id(int fd, enum spdk_placement_mode mode, int *placement_id)
|
|
{
|
|
*placement_id = -1;
|
|
|
|
switch (mode) {
|
|
case PLACEMENT_NONE:
|
|
break;
|
|
case PLACEMENT_MARK:
|
|
case PLACEMENT_NAPI: {
|
|
#if defined(SO_INCOMING_NAPI_ID)
|
|
socklen_t len = sizeof(int);
|
|
|
|
getsockopt(fd, SOL_SOCKET, SO_INCOMING_NAPI_ID, placement_id, &len);
|
|
#endif
|
|
break;
|
|
}
|
|
case PLACEMENT_CPU: {
|
|
#if defined(SO_INCOMING_CPU)
|
|
socklen_t len = sizeof(int);
|
|
|
|
getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, placement_id, &len);
|
|
#endif
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Insert a group into the placement map.
|
|
* If the group is already in the map, take a reference.
|
|
*/
|
|
int spdk_sock_map_insert(struct spdk_sock_map *map, int placement_id,
|
|
struct spdk_sock_group_impl *group_impl);
|
|
|
|
/**
|
|
* Release a reference for the given placement_id. If the reference count goes to 0, the
|
|
* entry will no longer be associated with a group.
|
|
*/
|
|
void spdk_sock_map_release(struct spdk_sock_map *map, int placement_id);
|
|
|
|
/**
|
|
* Look up the group for the given placement_id.
|
|
*/
|
|
int spdk_sock_map_lookup(struct spdk_sock_map *map, int placement_id,
|
|
struct spdk_sock_group_impl **group_impl, struct spdk_sock_group_impl *hint);
|
|
|
|
/**
|
|
* Find a placement id with no associated group
|
|
*/
|
|
int spdk_sock_map_find_free(struct spdk_sock_map *map);
|
|
|
|
/**
|
|
* Clean up all memory associated with the given map
|
|
*/
|
|
void spdk_sock_map_cleanup(struct spdk_sock_map *map);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* SPDK_INTERNAL_SOCK_H */
|