From b240c2b103d2e26093abaf8a9fa3309e71fd62a6 Mon Sep 17 00:00:00 2001 From: Mike Gerdts Date: Mon, 1 May 2023 13:02:47 -0500 Subject: [PATCH] lvol: lvol destruction race leads to null deref As an lvolstore is being destroyed, _vbdev_lvs_remove() starts an interation through the lvols to delete each one, ultimately leading to the destruction of the lvolstore with a call to lvs_free(). The callback passed to vbdev_lvs_destruct() is always called asynchronously via spdk_io_device_unregister() in bs_free(). When the lvolstore resides on bdevs that perform async IO (i.e. most bdevs other than malloc), this gives a small window when the lvol bdev is not registered but a lookup with spdk_lvol_get_by_uuid() or spdk_lvol_get_by_names() will succeed. If rpc_bdev_lvol_delete() runs during this window, it can get a reference to an lvol that has just been unregistered and lvol->blob may be NULL. This lvol is then passed to vbdev_lvol_destroy(). Before this fix, vbdev_lvol_destroy() would call: spdk_blob_is_degraded(lvol->blob); Which would then lead to a NULL pointer dereference, as spdk_blob_is_degraded() assumes a valid blob is passed. While a NULL check would avoid this particular problem, a NULL blob is not necessarily caused by the condition described above. It would better to flag the lvstore's destruction before returning from vbdev_lvs_destruct() and use that flag to prevent operations on the lvolstore that is being deleted. Such a flag already exists in the form of 'lvs_bdev->req != NULL', but that is set too late to close this race. This fix introduces lvs_bdev->removal_in_progress which is set prior to returning from vbdev_lvs_unload() and vbdev_lvs_destruct(). It is checked by vbdev_lvol_destroy() before trying to destroy the lvol. Now, any lvol destruction initiated by something other than vbdev_lvs_destruct() while an lvolstore unload or destroy is in progress will fail with -ENODEV. Fixes issue: #2998 Signed-off-by: Mike Gerdts Change-Id: I4d861879097703b0d8e3180e6de7ad6898f340fd Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/17891 Community-CI: Mellanox Build Bot Reviewed-by: Ben Walker Reviewed-by: Jim Harris Tested-by: SPDK CI Jenkins --- module/bdev/lvol/vbdev_lvol.c | 49 +++++++++++++++++--- module/bdev/lvol/vbdev_lvol.h | 1 + test/lvol/esnap/esnap.c | 86 +++++++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+), 7 deletions(-) diff --git a/module/bdev/lvol/vbdev_lvol.c b/module/bdev/lvol/vbdev_lvol.c index c164c08cf..cb439e634 100644 --- a/module/bdev/lvol/vbdev_lvol.c +++ b/module/bdev/lvol/vbdev_lvol.c @@ -41,6 +41,8 @@ struct spdk_bdev_module g_lvol_if = { SPDK_BDEV_MODULE_REGISTER(lvol, &g_lvol_if) +static void _vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg); + struct lvol_store_bdev * vbdev_get_lvs_bdev_by_lvs(struct spdk_lvol_store *lvs_orig) { @@ -50,8 +52,11 @@ vbdev_get_lvs_bdev_by_lvs(struct spdk_lvol_store *lvs_orig) while (lvs_bdev != NULL) { lvs = lvs_bdev->lvs; if (lvs == lvs_orig) { - if (lvs_bdev->req != NULL) { - /* We do not allow access to lvs that are being destroyed */ + if (lvs_bdev->removal_in_progress) { + /* We do not allow access to lvs that are being unloaded or + * destroyed */ + SPDK_DEBUGLOG(vbdev_lvol, "lvs %s: removal in progress\n", + lvs_orig->name); return NULL; } else { return lvs_bdev; @@ -120,8 +125,11 @@ vbdev_get_lvs_bdev_by_bdev(struct spdk_bdev *bdev_orig) while (lvs_bdev != NULL) { if (lvs_bdev->bdev == bdev_orig) { - if (lvs_bdev->req != NULL) { - /* We do not allow access to lvs that are being destroyed */ + if (lvs_bdev->removal_in_progress) { + /* We do not allow access to lvs that are being unloaded or + * destroyed */ + SPDK_DEBUGLOG(vbdev_lvol, "lvs %s: removal in progress\n", + lvs_bdev->lvs->name); return NULL; } else { return lvs_bdev; @@ -359,7 +367,7 @@ _vbdev_lvs_remove_lvol_cb(void *cb_arg, int lvolerrno) lvol = TAILQ_FIRST(&lvs->lvols); while (lvol != NULL) { if (spdk_lvol_deletable(lvol)) { - vbdev_lvol_destroy(lvol, _vbdev_lvs_remove_lvol_cb, lvs_bdev); + _vbdev_lvol_destroy(lvol, _vbdev_lvs_remove_lvol_cb, lvs_bdev); return; } lvol = TAILQ_NEXT(lvol, link); @@ -425,6 +433,8 @@ _vbdev_lvs_remove(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void return; } + lvs_bdev->removal_in_progress = true; + req->cb_fn = cb_fn; req->cb_arg = cb_arg; lvs_bdev->req = req; @@ -611,8 +621,8 @@ _vbdev_lvol_destroy_cb(void *cb_arg, int bdeverrno) free(ctx); } -void -vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg) +static void +_vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg) { struct vbdev_lvol_destroy_ctx *ctx; size_t count; @@ -620,6 +630,10 @@ vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb assert(lvol != NULL); assert(cb_fn != NULL); + /* Callers other than _vbdev_lvs_remove() must ensure the lvstore is not being removed. */ + assert(cb_fn == _vbdev_lvs_remove_lvol_cb || + vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store) != NULL); + /* Check if it is possible to delete lvol */ spdk_blob_get_clones(lvol->lvol_store->blobstore, lvol->blob_id, NULL, &count); if (count > 1) { @@ -647,6 +661,26 @@ vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb spdk_bdev_unregister(lvol->bdev, _vbdev_lvol_destroy_cb, ctx); } +void +vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg) +{ + struct lvol_store_bdev *lvs_bdev; + + /* + * During destruction of an lvolstore, _vbdev_lvs_unload() iterates through lvols until they + * are all deleted. There may be some IO required + */ + lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store); + if (lvs_bdev == NULL) { + SPDK_DEBUGLOG(vbdev_lvol, "lvol %s: lvolstore is being removed\n", + lvol->unique_id); + cb_fn(cb_arg, -ENODEV); + return; + } + + _vbdev_lvol_destroy(lvol, cb_fn, cb_arg); +} + static char * vbdev_lvol_find_name(struct spdk_lvol *lvol, spdk_blob_id blob_id) { @@ -1086,6 +1120,7 @@ _create_lvol_disk(struct spdk_lvol *lvol, bool destroy) lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store); if (lvs_bdev == NULL) { SPDK_ERRLOG("No spdk lvs-bdev pair found for lvol %s\n", lvol->unique_id); + assert(false); return -ENODEV; } diff --git a/module/bdev/lvol/vbdev_lvol.h b/module/bdev/lvol/vbdev_lvol.h index 18da7ffc6..19129f47f 100644 --- a/module/bdev/lvol/vbdev_lvol.h +++ b/module/bdev/lvol/vbdev_lvol.h @@ -16,6 +16,7 @@ struct lvol_store_bdev { struct spdk_lvol_store *lvs; struct spdk_bdev *bdev; struct spdk_lvs_req *req; + bool removal_in_progress; TAILQ_ENTRY(lvol_store_bdev) lvol_stores; }; diff --git a/test/lvol/esnap/esnap.c b/test/lvol/esnap/esnap.c index 998649081..f8db39409 100644 --- a/test/lvol/esnap/esnap.c +++ b/test/lvol/esnap/esnap.c @@ -672,6 +672,91 @@ esnap_remove_degraded(void) CU_ASSERT(rc == 0); } +static void +late_delete(void) +{ + char aiopath[PATH_MAX]; + struct spdk_lvol_store *lvs = NULL; + const uint32_t bs_size_bytes = 10 * 1024 * 1024; + const uint32_t bs_block_size = 4096; + const uint32_t cluster_size = 32 * 1024; + struct op_with_handle_data owh_data; + struct lvol_bdev *lvol_bdev; + struct spdk_lvol *lvol; + int rc, rc2; + int count; + + /* Create device for lvstore. This cannot be a malloc bdev because we need to sneak a + * deletion in while blob_persist() is in progress. + */ + rc = make_test_file(bs_size_bytes, aiopath, sizeof(aiopath), "late_delete.aio"); + SPDK_CU_ASSERT_FATAL(rc == 0); + rc = create_aio_bdev("aio1", aiopath, bs_block_size, false); + SPDK_CU_ASSERT_FATAL(rc == 0); + poll_threads(); + + /* Create lvstore */ + rc = vbdev_lvs_create("aio1", "lvs1", cluster_size, 0, 0, + lvs_op_with_handle_cb, clear_owh(&owh_data)); + SPDK_CU_ASSERT_FATAL(rc == 0); + poll_error_updated(&owh_data.lvserrno); + SPDK_CU_ASSERT_FATAL(owh_data.lvserrno == 0); + SPDK_CU_ASSERT_FATAL(owh_data.u.lvs != NULL); + lvs = owh_data.u.lvs; + + /* Create an lvol */ + vbdev_lvol_create(lvs, "lvol", 1, true, LVOL_CLEAR_WITH_DEFAULT, + lvol_op_with_handle_cb, clear_owh(&owh_data)); + poll_error_updated(&owh_data.lvserrno); + CU_ASSERT(owh_data.lvserrno == 0); + CU_ASSERT(owh_data.u.lvol != NULL); + + /* Verify the lvol can be found both ways */ + CU_ASSERT(spdk_bdev_get_by_name("lvs1/lvol") != NULL); + CU_ASSERT(spdk_lvol_get_by_names("lvs1", "lvol") != NULL); + + /* + * Once the lvolstore deletion starts, it will briefly be possible to look up lvol bdevs and + * degraded lvols in that lvolstore but any attempt to delete them will fail with -ENODEV. + */ + rc = 0xbad; + count = 0; + vbdev_lvs_destruct(lvs, lvol_op_complete_cb, &rc); + do { + lvol_bdev = (struct lvol_bdev *)spdk_bdev_get_by_name("lvs1/lvol"); + if (lvol_bdev != NULL) { + rc2 = 0xbad; + vbdev_lvol_destroy(lvol_bdev->lvol, lvol_op_complete_cb, &rc2); + CU_ASSERT(rc2 == -ENODEV); + } + lvol = spdk_lvol_get_by_names("lvs1", "lvol"); + if (lvol != NULL) { + /* If we are here, we are likely reproducing #2998 */ + rc2 = 0xbad; + vbdev_lvol_destroy(lvol, lvol_op_complete_cb, &rc2); + CU_ASSERT(rc2 == -ENODEV); + count++; + } + + spdk_thread_poll(g_ut_threads[0].thread, 0, 0); + } while (rc == 0xbad); + + /* Ensure that the test exercised the race */ + CU_ASSERT(count != 0); + + /* The lvol is now gone */ + CU_ASSERT(spdk_bdev_get_by_name("lvs1/lvol") == NULL); + CU_ASSERT(spdk_lvol_get_by_names("lvs1", "lvol") == NULL); + + /* Clean up */ + rc = 0xbad; + bdev_aio_delete("aio1", unregister_cb, &rc); + poll_error_updated(&rc); + CU_ASSERT(rc == 0); + rc = unlink(aiopath); + CU_ASSERT(rc == 0); +} + static void bdev_init_cb(void *arg, int rc) { @@ -706,6 +791,7 @@ main(int argc, char **argv) CU_ADD_TEST(suite, esnap_clone_io); CU_ADD_TEST(suite, esnap_hotplug); CU_ADD_TEST(suite, esnap_remove_degraded); + CU_ADD_TEST(suite, late_delete); allocate_threads(2); set_thread(0);