sock: Fix the "sock remove assert bug" in spdk_sock_group_remove_sock
The statement causes this issue is: assert(group_impl->num_removed_socks < MAX_EVENTS_PER_POLL); The call trace is: The previous solution is: commitid with:e71e81b631
But with this solution, it will always add the sock into the removed_socks list even if it is not under polling context by sock_group_impl_poll_count. So it will exceed the size of removed_socks array if sock_group_impl_poll_count function will not be called. And we should not use a large array, because it is just a workaround, it just hides the bug. So our current solution is: 1 Remove the code in sock layer, i.e., rollback the commite71e81b631
. This patch is not the right fix. The sock->cb_fn's NULL pointer case is caused by the cb_fn of write operation (if the spdk_sock_group_remove_sock is inside the cb_fn). And it is not caused by the epoll related cache issue described in commit "e7181.." commit, but caused by the following situation: (1)The socket's cb_fn is set to NULL which is caused by spdk_sock_group_remove_sock by the socket itself inside a call back function from a write operation. (2) And the socket is already in the pending_recv list. It is not caused by the epoll event issue, e.g., socket A changes Socket B's cb_fn. By the way, A socket A should never remove a socket B from a polling group. If it really does it, it should use spdk_thread_sendmsg to make sure it happens in the next round. 2 Add the code check in each posix, uring implementation module. If sock->cb_fn is NULL, we will not return the socket to the active socks list. And this is enough to address the issue. Signed-off-by: Ziye Yang <ziye.yang@intel.com> Change-Id: I79187f2f1301c819c46a5c3bdd84372f75534f2f Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6472 Reviewed-by: Xiaodong Liu <xiaodong.liu@intel.com> Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
parent
9d79d27e49
commit
d5cd0b13b6
@ -82,12 +82,6 @@ struct spdk_sock_group_impl {
|
||||
struct spdk_net_impl *net_impl;
|
||||
TAILQ_HEAD(, spdk_sock) socks;
|
||||
STAILQ_ENTRY(spdk_sock_group_impl) link;
|
||||
/* List of removed sockets. refreshed each time we poll the sock group. */
|
||||
int num_removed_socks;
|
||||
/* Unfortunately, we can't just keep a tailq of the sockets in case they are freed
|
||||
* or added to another poll group later.
|
||||
*/
|
||||
uintptr_t removed_socks[MAX_EVENTS_PER_POLL];
|
||||
};
|
||||
|
||||
struct spdk_net_impl {
|
||||
|
@ -493,7 +493,6 @@ spdk_sock_group_create(void *ctx)
|
||||
if (group_impl != NULL) {
|
||||
STAILQ_INSERT_TAIL(&group->group_impls, group_impl, link);
|
||||
TAILQ_INIT(&group_impl->socks);
|
||||
group_impl->num_removed_socks = 0;
|
||||
group_impl->net_impl = impl;
|
||||
}
|
||||
}
|
||||
@ -590,9 +589,6 @@ spdk_sock_group_remove_sock(struct spdk_sock_group *group, struct spdk_sock *soc
|
||||
rc = group_impl->net_impl->group_impl_remove_sock(group_impl, sock);
|
||||
if (rc == 0) {
|
||||
TAILQ_REMOVE(&group_impl->socks, sock, link);
|
||||
assert(group_impl->num_removed_socks < MAX_EVENTS_PER_POLL);
|
||||
group_impl->removed_socks[group_impl->num_removed_socks] = (uintptr_t)sock;
|
||||
group_impl->num_removed_socks++;
|
||||
sock->group_impl = NULL;
|
||||
sock->cb_fn = NULL;
|
||||
sock->cb_arg = NULL;
|
||||
@ -619,9 +615,6 @@ sock_group_impl_poll_count(struct spdk_sock_group_impl *group_impl,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The number of removed sockets should be reset for each call to poll. */
|
||||
group_impl->num_removed_socks = 0;
|
||||
|
||||
num_events = group_impl->net_impl->group_impl_poll(group_impl, max_events, socks);
|
||||
if (num_events == -1) {
|
||||
return -1;
|
||||
@ -629,20 +622,9 @@ sock_group_impl_poll_count(struct spdk_sock_group_impl *group_impl,
|
||||
|
||||
for (i = 0; i < num_events; i++) {
|
||||
struct spdk_sock *sock = socks[i];
|
||||
int j;
|
||||
bool valid = true;
|
||||
for (j = 0; j < group_impl->num_removed_socks; j++) {
|
||||
if ((uintptr_t)sock == group_impl->removed_socks[j]) {
|
||||
valid = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (valid) {
|
||||
assert(sock->cb_fn != NULL);
|
||||
sock->cb_fn(sock->cb_arg, group, sock);
|
||||
}
|
||||
}
|
||||
|
||||
return num_events;
|
||||
}
|
||||
|
@ -1292,6 +1292,14 @@ posix_sock_group_impl_poll(struct spdk_sock_group_impl *_group, int max_events,
|
||||
break;
|
||||
}
|
||||
|
||||
/* If the socket's cb_fn is NULL, just remove it from the
|
||||
* list and do not add it to socks array */
|
||||
if (spdk_unlikely(psock->base.cb_fn == NULL)) {
|
||||
psock->pending_recv = false;
|
||||
TAILQ_REMOVE(&group->pending_recv, psock, link);
|
||||
continue;
|
||||
}
|
||||
|
||||
socks[num_events++] = &psock->base;
|
||||
}
|
||||
|
||||
|
@ -919,6 +919,14 @@ sock_uring_group_reap(struct spdk_uring_sock_group_impl *group, int max, int max
|
||||
break;
|
||||
}
|
||||
|
||||
/* If the socket's cb_fn is NULL, just remove it from
|
||||
* the list and do not add it to socks array */
|
||||
if (spdk_unlikely(sock->base.cb_fn == NULL)) {
|
||||
sock->pending_recv = false;
|
||||
TAILQ_REMOVE(&group->pending_recv, sock, link);
|
||||
continue;
|
||||
}
|
||||
|
||||
socks[count++] = &sock->base;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user