From 4045068a329324884c4337ab42b323bf0c6a8438 Mon Sep 17 00:00:00 2001 From: Alexey Marchuk Date: Tue, 21 Mar 2023 19:53:58 +0100 Subject: [PATCH] lib/nvmf: Defer port removal while qpairs exist in poll group The following heap-use-after-free may happen when RDMA listener is removed: 1. At least 2 listeners exist, at least 1 qpair is created on each listening port 2. Listener A is removed, in nvmf_stop_listen_disconnect_qpairs we iterate all qpair (let's say A1 and B1) and we check if qpair's source trid matches listener's trid by calling nvmf_transport_qpair_get_listen_trid. Trid is retrieved from qpair->listen_id which points to the listener A cmid. Assume that qpair's A1 trid matches, A1 starts the disconnect process 3. After iterating all qpairs on step 2 we switch to the next IO channel and then complete port removal on RDMA transport layer where we destroy cmid of the listener A 4. Qpair A1 still has IO submitted to bdev, destruction is postponed 5. Listener B is removed, in nvmf_stop_listen_disconnect_qpairs we iterate all qpairs (A1 and B1) and try to check A1's listen trid. But listener A is already destroyed, so RDMA qpair->listen_id points to freed memory chunk To fix this issue, nvmf_stop_listen_disconnect_qpairs was modified to ensure that no qpairs with listen_trid == removed_trid exist before destroying the listener. Fixes issue #2948 Signed-off-by: Alexey Marchuk Change-Id: Iba263981ff02726f0c850bea90264118289e500c Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/17287 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Jim Harris --- lib/nvmf/transport.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib/nvmf/transport.c b/lib/nvmf/transport.c index bf3b61476..75895fd90 100644 --- a/lib/nvmf/transport.c +++ b/lib/nvmf/transport.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * Copyright (C) 2016 Intel Corporation. All rights reserved. * Copyright (c) 2018-2019, 2021 Mellanox Technologies LTD. All rights reserved. + * Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ #include "spdk/stdinc.h" @@ -469,6 +470,14 @@ nvmf_stop_listen_fini(struct spdk_io_channel_iter *i, int status) free(ctx); } +static void nvmf_stop_listen_disconnect_qpairs(struct spdk_io_channel_iter *i); + +static void +nvmf_stop_listen_disconnect_qpairs_msg(void *ctx) +{ + nvmf_stop_listen_disconnect_qpairs((struct spdk_io_channel_iter *)ctx); +} + static void nvmf_stop_listen_disconnect_qpairs(struct spdk_io_channel_iter *i) { @@ -477,6 +486,7 @@ nvmf_stop_listen_disconnect_qpairs(struct spdk_io_channel_iter *i) struct spdk_io_channel *ch; struct spdk_nvmf_qpair *qpair, *tmp_qpair; struct spdk_nvme_transport_id tmp_trid; + bool qpair_found = false; ctx = spdk_io_channel_iter_get_ctx(i); ch = spdk_io_channel_iter_get_channel(i); @@ -492,9 +502,15 @@ nvmf_stop_listen_disconnect_qpairs(struct spdk_io_channel_iter *i) if (ctx->subsystem == NULL || qpair->ctrlr == NULL || ctx->subsystem == qpair->ctrlr->subsys) { spdk_nvmf_qpair_disconnect(qpair, NULL, NULL); + qpair_found = true; } } } + if (qpair_found) { + spdk_thread_send_msg(spdk_get_thread(), nvmf_stop_listen_disconnect_qpairs_msg, i); + return; + } + spdk_for_each_channel_continue(i, 0); }