blob: defer unload until channel destroy done

As the blobstore is being unlaoded, async esnap channel destructions may
be in flight. In such a case, spdk_bs_unload() needs to defer the unload
of the blobstore until channel destructions are complete.

The following commands lead to the illustrated states.

  bdev_malloc_create -b malloc0
  bdev_lvol_clone_bdev lvs1 malloc0 eclone

     .---------.   .--------.
     | malloc0 |<--| eclone |
     `---------'   `--------'

  bdev_lvol_snapshot lvs1/eclone snap

     .---------.   .------.   .--------.
     | malloc0 |<--| snap |<--| eclone |
     `---------'   `------'   `--------'

  bdev_lvol_clone lvs1/snap eclone

                                .--------.
                              ,-| eclone |
     .---------.   .------.<-'  `--------'
     | malloc0 |<--| snap |
     `---------'   `------'<-.  .-------.
                              `-| clone |
                                `-------'

As the blobstore is preparing to be unloaded spdk_blob_unload(snap) is
called once for eclone, once for clone, and once for snap. The last of
these calls happens just before spdk_bs_unload() is called.
spdk_blob_unload() needs to destroy channels on each thread. During this
thread iteration, spdk_bs_unload() starts. The work performed in the
iteration maintains a reference to the blob, and as such it
spdk_bs_unload() cannot do its work until the iteration is complete.

Change-Id: Id9b92ad73341fb3437441146110055c84ee6dc52
Signed-off-by: Mike Gerdts <mgerdts@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14975
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
Mike Gerdts 2022-10-06 14:30:22 -05:00 committed by Jim Harris
parent 652232ae16
commit ba91ffbae1
3 changed files with 177 additions and 1 deletions

View File

@ -5586,6 +5586,30 @@ spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_a
SPDK_DEBUGLOG(blob, "Syncing blobstore\n");
/*
* If external snapshot channels are being destroyed while the blobstore is unloaded, the
* unload is deferred until after the channel destruction completes.
*/
if (bs->esnap_channels_unloading != 0) {
if (bs->esnap_unload_cb_fn != NULL) {
SPDK_ERRLOG("Blobstore unload in progress\n");
cb_fn(cb_arg, -EBUSY);
return;
}
SPDK_DEBUGLOG(blob_esnap, "Blobstore unload deferred: %" PRIu32
" esnap clones are unloading\n", bs->esnap_channels_unloading);
bs->esnap_unload_cb_fn = cb_fn;
bs->esnap_unload_cb_arg = cb_arg;
return;
}
if (bs->esnap_unload_cb_fn != NULL) {
SPDK_DEBUGLOG(blob_esnap, "Blobstore deferred unload progressing\n");
assert(bs->esnap_unload_cb_fn == cb_fn);
assert(bs->esnap_unload_cb_arg == cb_arg);
bs->esnap_unload_cb_fn = NULL;
bs->esnap_unload_cb_arg = NULL;
}
if (!RB_EMPTY(&bs->open_blobs)) {
SPDK_ERRLOG("Blobstore still has open blobs\n");
cb_fn(cb_arg, -EBUSY);
@ -7977,7 +8001,8 @@ blob_close_esnap_done(void *cb_arg, struct spdk_blob *blob, int bserrno)
return;
}
SPDK_DEBUGLOG(blob_esnap, "blob %" PRIx64 ": closed, syncing metadata\n", blob->id);
SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": closed, syncing metadata on thread %s\n",
blob->id, spdk_thread_get_name(spdk_get_thread()));
/* Sync metadata */
blob_persist(seq, blob, blob_close_cpl, blob);
@ -8842,6 +8867,7 @@ blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
{
struct blob_esnap_destroy_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
struct spdk_blob *blob = ctx->blob;
struct spdk_blob_store *bs = blob->bs;
SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
blob->id);
@ -8850,6 +8876,11 @@ blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
ctx->cb_fn(ctx->cb_arg, blob, status);
}
free(ctx);
bs->esnap_channels_unloading--;
if (bs->esnap_channels_unloading == 0 && bs->esnap_unload_cb_fn != NULL) {
spdk_bs_unload(bs, bs->esnap_unload_cb_fn, bs->esnap_unload_cb_arg);
}
}
static void
@ -8910,6 +8941,7 @@ blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, spdk_blob_op_with_han
SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channels for this blob\n",
blob->id);
blob->bs->esnap_channels_unloading++;
spdk_for_each_channel(blob->bs, blob_esnap_destroy_one_channel, ctx,
blob_esnap_destroy_channels_done);
}

View File

@ -189,6 +189,14 @@ struct spdk_blob_store {
spdk_bs_esnap_dev_create esnap_bs_dev_create;
void *esnap_ctx;
/* If external snapshot channels are being destroyed while
* the blobstore is unloaded, the unload is deferred until
* after the channel destruction completes.
*/
uint32_t esnap_channels_unloading;
spdk_bs_op_complete esnap_unload_cb_fn;
void *esnap_unload_cb_arg;
};
struct spdk_bs_channel {

View File

@ -167,6 +167,11 @@ bs_op_with_handle_complete(void *cb_arg, struct spdk_blob_store *bs,
static void
blob_op_complete(void *cb_arg, int bserrno)
{
if (cb_arg != NULL) {
int *errp = cb_arg;
*errp = bserrno;
}
g_bserrno = bserrno;
}
@ -7577,6 +7582,136 @@ blob_esnap_create(void)
g_blob = NULL;
}
static void
blob_esnap_clone_reload(void)
{
struct spdk_blob_store *bs = g_bs;
struct spdk_bs_opts bs_opts;
struct ut_esnap_opts esnap_opts;
struct spdk_blob_opts opts;
struct spdk_blob *eclone1, *snap1, *clone1;
uint32_t cluster_sz = spdk_bs_get_cluster_size(bs);
uint32_t block_sz = spdk_bs_get_io_unit_size(bs);
const uint32_t esnap_num_clusters = 4;
uint64_t esnap_num_blocks = cluster_sz * esnap_num_clusters / block_sz;
spdk_blob_id eclone1_id, snap1_id, clone1_id;
struct spdk_io_channel *bs_ch;
char buf[block_sz];
int bserr1, bserr2, bserr3, bserr4;
struct spdk_bs_dev *dev;
/* Create and open an esnap clone blob */
ut_spdk_blob_opts_init(&opts);
ut_esnap_opts_init(block_sz, esnap_num_blocks, __func__, NULL, &esnap_opts);
opts.esnap_id = &esnap_opts;
opts.esnap_id_len = sizeof(esnap_opts);
opts.num_clusters = esnap_num_clusters;
eclone1 = ut_blob_create_and_open(bs, &opts);
CU_ASSERT(eclone1 != NULL);
CU_ASSERT(spdk_blob_is_esnap_clone(eclone1));
eclone1_id = eclone1->id;
/* Create and open a snapshot of eclone1 */
spdk_bs_create_snapshot(bs, eclone1_id, NULL, blob_op_with_id_complete, NULL);
poll_threads();
CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
CU_ASSERT(g_bserrno == 0);
snap1_id = g_blobid;
spdk_bs_open_blob(bs, snap1_id, blob_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
snap1 = g_blob;
/* Create and open regular clone of snap1 */
spdk_bs_create_clone(bs, snap1_id, NULL, blob_op_with_id_complete, NULL);
poll_threads();
CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
SPDK_CU_ASSERT_FATAL(g_bserrno == 0);
clone1_id = g_blobid;
spdk_bs_open_blob(bs, clone1_id, blob_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
clone1 = g_blob;
/* Close the blobs in preparation for reloading the blobstore */
spdk_blob_close(clone1, blob_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
spdk_blob_close(snap1, blob_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
spdk_blob_close(eclone1, blob_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
g_blob = NULL;
/* Reload the blobstore */
spdk_bs_opts_init(&bs_opts, sizeof(bs_opts));
bs_opts.esnap_bs_dev_create = ut_esnap_create;
ut_bs_reload(&bs, &bs_opts);
/* Be sure each of the blobs can be opened */
spdk_bs_open_blob(bs, eclone1_id, blob_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
eclone1 = g_blob;
spdk_bs_open_blob(bs, snap1_id, blob_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
snap1 = g_blob;
spdk_bs_open_blob(bs, clone1_id, blob_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
CU_ASSERT(g_blob != NULL);
clone1 = g_blob;
/* Perform some reads on each of them to cause channels to be allocated */
bs_ch = spdk_bs_alloc_io_channel(bs);
CU_ASSERT(bs_ch != NULL);
spdk_blob_io_read(eclone1, bs_ch, buf, 0, 1, bs_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
spdk_blob_io_read(snap1, bs_ch, buf, 0, 1, bs_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
spdk_blob_io_read(clone1, bs_ch, buf, 0, 1, bs_op_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
/*
* Unload the blobstore in a way similar to how lvstore unloads it. This should exercise
* the deferred unload path in spdk_bs_unload().
*/
bserr1 = 0xbad;
bserr2 = 0xbad;
bserr3 = 0xbad;
bserr4 = 0xbad;
spdk_blob_close(eclone1, blob_op_complete, &bserr1);
spdk_blob_close(snap1, blob_op_complete, &bserr2);
spdk_blob_close(clone1, blob_op_complete, &bserr3);
spdk_bs_unload(bs, blob_op_complete, &bserr4);
spdk_bs_free_io_channel(bs_ch);
poll_threads();
CU_ASSERT(bserr1 == 0);
CU_ASSERT(bserr2 == 0);
CU_ASSERT(bserr3 == 0);
CU_ASSERT(bserr4 == 0);
g_blob = NULL;
/* Reload the blobstore */
spdk_bs_opts_init(&bs_opts, sizeof(bs_opts));
bs_opts.esnap_bs_dev_create = ut_esnap_create;
dev = init_dev();
spdk_bs_load(dev, &bs_opts, bs_op_with_handle_complete, NULL);
poll_threads();
CU_ASSERT(g_bserrno == 0);
SPDK_CU_ASSERT_FATAL(g_bs != NULL);
}
static bool
blob_esnap_verify_contents(struct spdk_blob *blob, struct spdk_io_channel *ch,
uint64_t offset, uint64_t size, uint32_t readsize, const char *how)
@ -8494,6 +8629,7 @@ main(int argc, char **argv)
CU_ADD_TEST(suite_esnap_bs, blob_esnap_clone_snapshot);
CU_ADD_TEST(suite_esnap_bs, blob_esnap_clone_inflate);
CU_ADD_TEST(suite_esnap_bs, blob_esnap_clone_decouple);
CU_ADD_TEST(suite_esnap_bs, blob_esnap_clone_reload);
allocate_threads(2);
set_thread(0);