From ba91ffbae1aa7597088dd2daa6319a6ccbb70e13 Mon Sep 17 00:00:00 2001 From: Mike Gerdts Date: Thu, 6 Oct 2022 14:30:22 -0500 Subject: [PATCH] blob: defer unload until channel destroy done As the blobstore is being unlaoded, async esnap channel destructions may be in flight. In such a case, spdk_bs_unload() needs to defer the unload of the blobstore until channel destructions are complete. The following commands lead to the illustrated states. bdev_malloc_create -b malloc0 bdev_lvol_clone_bdev lvs1 malloc0 eclone .---------. .--------. | malloc0 |<--| eclone | `---------' `--------' bdev_lvol_snapshot lvs1/eclone snap .---------. .------. .--------. | malloc0 |<--| snap |<--| eclone | `---------' `------' `--------' bdev_lvol_clone lvs1/snap eclone .--------. ,-| eclone | .---------. .------.<-' `--------' | malloc0 |<--| snap | `---------' `------'<-. .-------. `-| clone | `-------' As the blobstore is preparing to be unloaded spdk_blob_unload(snap) is called once for eclone, once for clone, and once for snap. The last of these calls happens just before spdk_bs_unload() is called. spdk_blob_unload() needs to destroy channels on each thread. During this thread iteration, spdk_bs_unload() starts. The work performed in the iteration maintains a reference to the blob, and as such it spdk_bs_unload() cannot do its work until the iteration is complete. Change-Id: Id9b92ad73341fb3437441146110055c84ee6dc52 Signed-off-by: Mike Gerdts Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14975 Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Community-CI: Mellanox Build Bot Reviewed-by: Ben Walker --- lib/blob/blobstore.c | 34 ++++++- lib/blob/blobstore.h | 8 ++ test/unit/lib/blob/blob.c/blob_ut.c | 136 ++++++++++++++++++++++++++++ 3 files changed, 177 insertions(+), 1 deletion(-) diff --git a/lib/blob/blobstore.c b/lib/blob/blobstore.c index a6d8c728f..0ae583f95 100644 --- a/lib/blob/blobstore.c +++ b/lib/blob/blobstore.c @@ -5586,6 +5586,30 @@ spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_a SPDK_DEBUGLOG(blob, "Syncing blobstore\n"); + /* + * If external snapshot channels are being destroyed while the blobstore is unloaded, the + * unload is deferred until after the channel destruction completes. + */ + if (bs->esnap_channels_unloading != 0) { + if (bs->esnap_unload_cb_fn != NULL) { + SPDK_ERRLOG("Blobstore unload in progress\n"); + cb_fn(cb_arg, -EBUSY); + return; + } + SPDK_DEBUGLOG(blob_esnap, "Blobstore unload deferred: %" PRIu32 + " esnap clones are unloading\n", bs->esnap_channels_unloading); + bs->esnap_unload_cb_fn = cb_fn; + bs->esnap_unload_cb_arg = cb_arg; + return; + } + if (bs->esnap_unload_cb_fn != NULL) { + SPDK_DEBUGLOG(blob_esnap, "Blobstore deferred unload progressing\n"); + assert(bs->esnap_unload_cb_fn == cb_fn); + assert(bs->esnap_unload_cb_arg == cb_arg); + bs->esnap_unload_cb_fn = NULL; + bs->esnap_unload_cb_arg = NULL; + } + if (!RB_EMPTY(&bs->open_blobs)) { SPDK_ERRLOG("Blobstore still has open blobs\n"); cb_fn(cb_arg, -EBUSY); @@ -7977,7 +8001,8 @@ blob_close_esnap_done(void *cb_arg, struct spdk_blob *blob, int bserrno) return; } - SPDK_DEBUGLOG(blob_esnap, "blob %" PRIx64 ": closed, syncing metadata\n", blob->id); + SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": closed, syncing metadata on thread %s\n", + blob->id, spdk_thread_get_name(spdk_get_thread())); /* Sync metadata */ blob_persist(seq, blob, blob_close_cpl, blob); @@ -8842,6 +8867,7 @@ blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status) { struct blob_esnap_destroy_ctx *ctx = spdk_io_channel_iter_get_ctx(i); struct spdk_blob *blob = ctx->blob; + struct spdk_blob_store *bs = blob->bs; SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n", blob->id); @@ -8850,6 +8876,11 @@ blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status) ctx->cb_fn(ctx->cb_arg, blob, status); } free(ctx); + + bs->esnap_channels_unloading--; + if (bs->esnap_channels_unloading == 0 && bs->esnap_unload_cb_fn != NULL) { + spdk_bs_unload(bs, bs->esnap_unload_cb_fn, bs->esnap_unload_cb_arg); + } } static void @@ -8910,6 +8941,7 @@ blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, spdk_blob_op_with_han SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channels for this blob\n", blob->id); + blob->bs->esnap_channels_unloading++; spdk_for_each_channel(blob->bs, blob_esnap_destroy_one_channel, ctx, blob_esnap_destroy_channels_done); } diff --git a/lib/blob/blobstore.h b/lib/blob/blobstore.h index da04cab7a..ab31de29b 100644 --- a/lib/blob/blobstore.h +++ b/lib/blob/blobstore.h @@ -189,6 +189,14 @@ struct spdk_blob_store { spdk_bs_esnap_dev_create esnap_bs_dev_create; void *esnap_ctx; + + /* If external snapshot channels are being destroyed while + * the blobstore is unloaded, the unload is deferred until + * after the channel destruction completes. + */ + uint32_t esnap_channels_unloading; + spdk_bs_op_complete esnap_unload_cb_fn; + void *esnap_unload_cb_arg; }; struct spdk_bs_channel { diff --git a/test/unit/lib/blob/blob.c/blob_ut.c b/test/unit/lib/blob/blob.c/blob_ut.c index 62c90e59b..609de1e5b 100644 --- a/test/unit/lib/blob/blob.c/blob_ut.c +++ b/test/unit/lib/blob/blob.c/blob_ut.c @@ -167,6 +167,11 @@ bs_op_with_handle_complete(void *cb_arg, struct spdk_blob_store *bs, static void blob_op_complete(void *cb_arg, int bserrno) { + if (cb_arg != NULL) { + int *errp = cb_arg; + + *errp = bserrno; + } g_bserrno = bserrno; } @@ -7577,6 +7582,136 @@ blob_esnap_create(void) g_blob = NULL; } +static void +blob_esnap_clone_reload(void) +{ + struct spdk_blob_store *bs = g_bs; + struct spdk_bs_opts bs_opts; + struct ut_esnap_opts esnap_opts; + struct spdk_blob_opts opts; + struct spdk_blob *eclone1, *snap1, *clone1; + uint32_t cluster_sz = spdk_bs_get_cluster_size(bs); + uint32_t block_sz = spdk_bs_get_io_unit_size(bs); + const uint32_t esnap_num_clusters = 4; + uint64_t esnap_num_blocks = cluster_sz * esnap_num_clusters / block_sz; + spdk_blob_id eclone1_id, snap1_id, clone1_id; + struct spdk_io_channel *bs_ch; + char buf[block_sz]; + int bserr1, bserr2, bserr3, bserr4; + struct spdk_bs_dev *dev; + + /* Create and open an esnap clone blob */ + ut_spdk_blob_opts_init(&opts); + ut_esnap_opts_init(block_sz, esnap_num_blocks, __func__, NULL, &esnap_opts); + opts.esnap_id = &esnap_opts; + opts.esnap_id_len = sizeof(esnap_opts); + opts.num_clusters = esnap_num_clusters; + eclone1 = ut_blob_create_and_open(bs, &opts); + CU_ASSERT(eclone1 != NULL); + CU_ASSERT(spdk_blob_is_esnap_clone(eclone1)); + eclone1_id = eclone1->id; + + /* Create and open a snapshot of eclone1 */ + spdk_bs_create_snapshot(bs, eclone1_id, NULL, blob_op_with_id_complete, NULL); + poll_threads(); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + CU_ASSERT(g_bserrno == 0); + snap1_id = g_blobid; + spdk_bs_open_blob(bs, snap1_id, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + snap1 = g_blob; + + /* Create and open regular clone of snap1 */ + spdk_bs_create_clone(bs, snap1_id, NULL, blob_op_with_id_complete, NULL); + poll_threads(); + CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID); + SPDK_CU_ASSERT_FATAL(g_bserrno == 0); + clone1_id = g_blobid; + spdk_bs_open_blob(bs, clone1_id, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + clone1 = g_blob; + + /* Close the blobs in preparation for reloading the blobstore */ + spdk_blob_close(clone1, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + spdk_blob_close(snap1, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + spdk_blob_close(eclone1, blob_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + g_blob = NULL; + + /* Reload the blobstore */ + spdk_bs_opts_init(&bs_opts, sizeof(bs_opts)); + bs_opts.esnap_bs_dev_create = ut_esnap_create; + ut_bs_reload(&bs, &bs_opts); + + /* Be sure each of the blobs can be opened */ + spdk_bs_open_blob(bs, eclone1_id, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + eclone1 = g_blob; + spdk_bs_open_blob(bs, snap1_id, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + snap1 = g_blob; + spdk_bs_open_blob(bs, clone1_id, blob_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + CU_ASSERT(g_blob != NULL); + clone1 = g_blob; + + /* Perform some reads on each of them to cause channels to be allocated */ + bs_ch = spdk_bs_alloc_io_channel(bs); + CU_ASSERT(bs_ch != NULL); + spdk_blob_io_read(eclone1, bs_ch, buf, 0, 1, bs_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + spdk_blob_io_read(snap1, bs_ch, buf, 0, 1, bs_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + spdk_blob_io_read(clone1, bs_ch, buf, 0, 1, bs_op_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + + /* + * Unload the blobstore in a way similar to how lvstore unloads it. This should exercise + * the deferred unload path in spdk_bs_unload(). + */ + bserr1 = 0xbad; + bserr2 = 0xbad; + bserr3 = 0xbad; + bserr4 = 0xbad; + spdk_blob_close(eclone1, blob_op_complete, &bserr1); + spdk_blob_close(snap1, blob_op_complete, &bserr2); + spdk_blob_close(clone1, blob_op_complete, &bserr3); + spdk_bs_unload(bs, blob_op_complete, &bserr4); + spdk_bs_free_io_channel(bs_ch); + poll_threads(); + CU_ASSERT(bserr1 == 0); + CU_ASSERT(bserr2 == 0); + CU_ASSERT(bserr3 == 0); + CU_ASSERT(bserr4 == 0); + g_blob = NULL; + + /* Reload the blobstore */ + spdk_bs_opts_init(&bs_opts, sizeof(bs_opts)); + bs_opts.esnap_bs_dev_create = ut_esnap_create; + dev = init_dev(); + spdk_bs_load(dev, &bs_opts, bs_op_with_handle_complete, NULL); + poll_threads(); + CU_ASSERT(g_bserrno == 0); + SPDK_CU_ASSERT_FATAL(g_bs != NULL); +} + static bool blob_esnap_verify_contents(struct spdk_blob *blob, struct spdk_io_channel *ch, uint64_t offset, uint64_t size, uint32_t readsize, const char *how) @@ -8494,6 +8629,7 @@ main(int argc, char **argv) CU_ADD_TEST(suite_esnap_bs, blob_esnap_clone_snapshot); CU_ADD_TEST(suite_esnap_bs, blob_esnap_clone_inflate); CU_ADD_TEST(suite_esnap_bs, blob_esnap_clone_decouple); + CU_ADD_TEST(suite_esnap_bs, blob_esnap_clone_reload); allocate_threads(2); set_thread(0);