sock: Fix the "sock remove assert bug" in spdk_sock_group_remove_sock

The statement causes this issue is:

assert(group_impl->num_removed_socks < MAX_EVENTS_PER_POLL);

The call trace is:

The previous solution is:
commitid with: e71e81b631

But with this solution, it will always add the sock
into the removed_socks list even if it is not under polling
context by sock_group_impl_poll_count. So it will exceed the size of
removed_socks array if sock_group_impl_poll_count function will not be
called. And we should not use a large array, because it is just a workaround,
it just hides the bug.

So our current solution is:

1 Remove the code in sock layer, i.e., rollback the commit
 e71e81b631. This patch is
not the right fix. The sock->cb_fn's NULL pointer case is
caused by the cb_fn of write operation (if the
spdk_sock_group_remove_sock is inside the cb_fn). And it is not
caused by the epoll related cache issue described in commit
"e7181.." commit, but caused by the following situation:

(1)The socket's cb_fn is set to NULL which is caused by
spdk_sock_group_remove_sock by the socket itself
inside a call back function from a write operation.

(2) And the socket is already in the pending_recv list. It is
not caused by the epoll event issue, e.g., socket A changes Socket B's
cb_fn. By the way, A socket A should never remove a socket B from a polling group.
If it really does it, it should use spdk_thread_sendmsg to make sure
it happens in the next round.

2 Add the code check in each posix, uring implementation module.
If sock->cb_fn is NULL, we will not return the socket to the active socks list.
And this is enough to address the issue.

Signed-off-by: Ziye Yang <ziye.yang@intel.com>
Change-Id: I79187f2f1301c819c46a5c3bdd84372f75534f2f
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6472
Reviewed-by: Xiaodong Liu <xiaodong.liu@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
Ziye Yang 2021-02-19 20:02:07 +08:00 committed by Tomasz Zawadzki
parent 9d79d27e49
commit d5cd0b13b6
4 changed files with 18 additions and 26 deletions

View File

@ -82,12 +82,6 @@ struct spdk_sock_group_impl {
struct spdk_net_impl *net_impl;
TAILQ_HEAD(, spdk_sock) socks;
STAILQ_ENTRY(spdk_sock_group_impl) link;
/* List of removed sockets. refreshed each time we poll the sock group. */
int num_removed_socks;
/* Unfortunately, we can't just keep a tailq of the sockets in case they are freed
* or added to another poll group later.
*/
uintptr_t removed_socks[MAX_EVENTS_PER_POLL];
};
struct spdk_net_impl {

View File

@ -493,7 +493,6 @@ spdk_sock_group_create(void *ctx)
if (group_impl != NULL) {
STAILQ_INSERT_TAIL(&group->group_impls, group_impl, link);
TAILQ_INIT(&group_impl->socks);
group_impl->num_removed_socks = 0;
group_impl->net_impl = impl;
}
}
@ -590,9 +589,6 @@ spdk_sock_group_remove_sock(struct spdk_sock_group *group, struct spdk_sock *soc
rc = group_impl->net_impl->group_impl_remove_sock(group_impl, sock);
if (rc == 0) {
TAILQ_REMOVE(&group_impl->socks, sock, link);
assert(group_impl->num_removed_socks < MAX_EVENTS_PER_POLL);
group_impl->removed_socks[group_impl->num_removed_socks] = (uintptr_t)sock;
group_impl->num_removed_socks++;
sock->group_impl = NULL;
sock->cb_fn = NULL;
sock->cb_arg = NULL;
@ -619,9 +615,6 @@ sock_group_impl_poll_count(struct spdk_sock_group_impl *group_impl,
return 0;
}
/* The number of removed sockets should be reset for each call to poll. */
group_impl->num_removed_socks = 0;
num_events = group_impl->net_impl->group_impl_poll(group_impl, max_events, socks);
if (num_events == -1) {
return -1;
@ -629,19 +622,8 @@ sock_group_impl_poll_count(struct spdk_sock_group_impl *group_impl,
for (i = 0; i < num_events; i++) {
struct spdk_sock *sock = socks[i];
int j;
bool valid = true;
for (j = 0; j < group_impl->num_removed_socks; j++) {
if ((uintptr_t)sock == group_impl->removed_socks[j]) {
valid = false;
break;
}
}
if (valid) {
assert(sock->cb_fn != NULL);
sock->cb_fn(sock->cb_arg, group, sock);
}
assert(sock->cb_fn != NULL);
sock->cb_fn(sock->cb_arg, group, sock);
}
return num_events;

View File

@ -1292,6 +1292,14 @@ posix_sock_group_impl_poll(struct spdk_sock_group_impl *_group, int max_events,
break;
}
/* If the socket's cb_fn is NULL, just remove it from the
* list and do not add it to socks array */
if (spdk_unlikely(psock->base.cb_fn == NULL)) {
psock->pending_recv = false;
TAILQ_REMOVE(&group->pending_recv, psock, link);
continue;
}
socks[num_events++] = &psock->base;
}

View File

@ -919,6 +919,14 @@ sock_uring_group_reap(struct spdk_uring_sock_group_impl *group, int max, int max
break;
}
/* If the socket's cb_fn is NULL, just remove it from
* the list and do not add it to socks array */
if (spdk_unlikely(sock->base.cb_fn == NULL)) {
sock->pending_recv = false;
TAILQ_REMOVE(&group->pending_recv, sock, link);
continue;
}
socks[count++] = &sock->base;
}