From 0fa543f2092c055961f359d8d9e2438e8e86bf7a Mon Sep 17 00:00:00 2001 From: Mike Gerdts Date: Mon, 10 Oct 2022 16:31:31 -0500 Subject: [PATCH] vbdev_lvol: degraded open of esnap clones If an esnap clone is missing its snapshot the lvol should still open in degraded mode. A degraded lvol will not have a bdev registered and as such cannot perform any IO. Change-Id: I736194650dfcf1eb78214c8896c31acc7a946b54 Signed-off-by: Mike Gerdts Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16425 Reviewed-by: Jim Harris Reviewed-by: Ben Walker Tested-by: SPDK CI Jenkins --- module/bdev/lvol/vbdev_lvol.c | 129 +++++++++++++++++- test/lvol/external_snapshot.sh | 125 +++++++++++++++++ .../lib/bdev/vbdev_lvol.c/vbdev_lvol_ut.c | 47 +++++-- 3 files changed, 282 insertions(+), 19 deletions(-) diff --git a/module/bdev/lvol/vbdev_lvol.c b/module/bdev/lvol/vbdev_lvol.c index 1439e6c0b..329bb3d68 100644 --- a/module/bdev/lvol/vbdev_lvol.c +++ b/module/bdev/lvol/vbdev_lvol.c @@ -10,6 +10,7 @@ #include "spdk/log.h" #include "spdk/string.h" #include "spdk/uuid.h" +#include "spdk/blob.h" #include "vbdev_lvol.h" @@ -439,6 +440,10 @@ _vbdev_lvs_remove(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void return; } TAILQ_FOREACH_SAFE(lvol, &lvs->lvols, link, tmp) { + if (lvol->bdev == NULL) { + spdk_lvol_close(lvol, _vbdev_lvs_remove_bdev_unregistered_cb, lvs_bdev); + continue; + } spdk_bdev_unregister(lvol->bdev, _vbdev_lvs_remove_bdev_unregistered_cb, lvs_bdev); } } @@ -1003,6 +1008,12 @@ _create_lvol_disk(struct spdk_lvol *lvol, bool destroy) unsigned char *alias; int rc; + if (spdk_blob_is_degraded(lvol->blob)) { + SPDK_NOTICELOG("lvol %s: blob is degraded: deferring bdev creation\n", + lvol->unique_id); + return 0; + } + lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store); if (lvs_bdev == NULL) { SPDK_ERRLOG("No spdk lvs-bdev pair found for lvol %s\n", lvol->unique_id); @@ -1686,6 +1697,90 @@ vbdev_lvs_grow(struct spdk_lvol_store *lvs, } } +/* Begin degraded blobstore device */ + +/* + * When an external snapshot is missing, an instance of bs_dev_degraded is used as the blob's + * back_bs_dev. No bdev is registered, so there should be no IO nor requests for channels. The main + * purposes of this device are to prevent blobstore from hitting fatal runtime errors and to + * indicate that the blob is degraded via the is_degraded() callback. + */ + +static void +bs_dev_degraded_read(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload, + uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args) +{ + assert(false); + cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EIO); +} + +static void +bs_dev_degraded_readv(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, + struct iovec *iov, int iovcnt, uint64_t lba, uint32_t lba_count, + struct spdk_bs_dev_cb_args *cb_args) +{ + assert(false); + cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EIO); +} + +static void +bs_dev_degraded_readv_ext(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, + struct iovec *iov, int iovcnt, uint64_t lba, uint32_t lba_count, + struct spdk_bs_dev_cb_args *cb_args, + struct spdk_blob_ext_io_opts *io_opts) +{ + assert(false); + cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EIO); +} + +static bool +bs_dev_degraded_is_zeroes(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count) +{ + assert(false); + return false; +} + +static struct spdk_io_channel * +bs_dev_degraded_create_channel(struct spdk_bs_dev *bs_dev) +{ + assert(false); + return NULL; +} + +static void +bs_dev_degraded_destroy_channel(struct spdk_bs_dev *bs_dev, struct spdk_io_channel *channel) +{ + assert(false); +} + +static void +bs_dev_degraded_destroy(struct spdk_bs_dev *bs_dev) +{ +} + +static bool +bs_dev_degraded_is_degraded(struct spdk_bs_dev *bs_dev) +{ + return true; +} + +static struct spdk_bs_dev bs_dev_degraded = { + .create_channel = bs_dev_degraded_create_channel, + .destroy_channel = bs_dev_degraded_destroy_channel, + .destroy = bs_dev_degraded_destroy, + .read = bs_dev_degraded_read, + .readv = bs_dev_degraded_readv, + .readv_ext = bs_dev_degraded_readv_ext, + .is_zeroes = bs_dev_degraded_is_zeroes, + .is_degraded = bs_dev_degraded_is_degraded, + /* Make the device as large as possible without risk of uint64 overflow. */ + .blockcnt = UINT64_MAX / 512, + /* Prevent divide by zero errors calculating LBAs that will never be read. */ + .blocklen = 512, +}; + +/* End degraded blobstore device */ + /* Begin external snapshot support */ static void @@ -1701,6 +1796,7 @@ vbdev_lvol_esnap_dev_create(void *bs_ctx, void *blob_ctx, struct spdk_blob *blob const void *esnap_id, uint32_t id_len, struct spdk_bs_dev **_bs_dev) { + struct spdk_lvol_store *lvs = bs_ctx; struct spdk_lvol *lvol = blob_ctx; struct spdk_bs_dev *bs_dev = NULL; struct spdk_uuid uuid; @@ -1735,16 +1831,37 @@ vbdev_lvol_esnap_dev_create(void *bs_ctx, void *blob_ctx, struct spdk_blob *blob rc = spdk_bdev_create_bs_dev(uuid_str, false, NULL, 0, vbdev_lvol_esnap_bdev_event_cb, NULL, &bs_dev); if (rc != 0) { - SPDK_ERRLOG("lvol %s: failed to create bs_dev from bdev '%s': %d\n", - lvol->unique_id, uuid_str, rc); - return rc; + goto fail; } + rc = spdk_bs_bdev_claim(bs_dev, &g_lvol_if); if (rc != 0) { - SPDK_ERRLOG("lvol %s: unable to claim esnap bdev '%s': %d\n", - lvol->unique_id, uuid_str, rc); + SPDK_ERRLOG("lvol %s: unable to claim esnap bdev '%s': %d\n", lvol->unique_id, + uuid_str, rc); bs_dev->destroy(bs_dev); - return rc; + goto fail; + } + + *_bs_dev = bs_dev; + return 0; + +fail: + /* Unable to open or claim the bdev. This lvol is degraded. */ + bs_dev = &bs_dev_degraded; + SPDK_NOTICELOG("lvol %s: bdev %s not available: lvol is degraded\n", lvol->unique_id, + uuid_str); + + /* + * Be sure not to call spdk_lvs_missing_add() on an lvol that is already degraded. This can + * lead to a cycle in the degraded_lvols tailq. + */ + if (lvol->degraded_set == NULL) { + rc = spdk_lvs_esnap_missing_add(lvs, lvol, uuid_str, sizeof(uuid_str)); + if (rc != 0) { + SPDK_NOTICELOG("lvol %s: unable to register missing esnap device %s: " + "it will not be hotplugged if added later\n", + lvol->unique_id, uuid_str); + } } *_bs_dev = bs_dev; diff --git a/test/lvol/external_snapshot.sh b/test/lvol/external_snapshot.sh index 7d10ac167..b6f0f5bd2 100755 --- a/test/lvol/external_snapshot.sh +++ b/test/lvol/external_snapshot.sh @@ -75,6 +75,130 @@ function test_esnap_reload() { rpc_cmd bdev_malloc_delete "$esnap_dev" } +function test_esnap_reload_missing() { + local bs_dev esnap_dev + local block_size=512 + local esnap_size_mb=1 + local lvs_cluster_size=$((16 * 1024)) + local lvs_uuid esnap_uuid eclone_uuid snap_uuid clone_uuid uuid + local aio_bdev=test_esnap_reload_aio0 + local lvols + + # Create the lvstore on an aio device. Can't use malloc because we need to remove + # the device and re-add it to trigger an lvstore unload and then load. + rm -f $testdir/aio_bdev_0 + truncate -s "${AIO_SIZE_MB}M" $testdir/aio_bdev_0 + bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size") + lvs_uuid=$(rpc_cmd bdev_lvol_create_lvstore -c "$lvs_cluster_size" "$bs_dev" lvs_test) + + # Create a bdev that will be the external snapshot + # State: + # esnap(present) <-- eclone(not degraded) + esnap_uuid=e4b40d8b-f623-416d-8234-baf5a4c83cbd + esnap_dev=$(rpc_cmd bdev_malloc_create -u "$esnap_uuid" "$esnap_size_mb" "$block_size") + eclone_uuid=$(rpc_cmd bdev_lvol_clone_bdev "$esnap_uuid" lvs_test "eclone") + lvols=$(rpc_cmd bdev_lvol_get_lvols) + [[ "$(jq -r '. | length' <<< "$lvols")" == "1" ]] + [[ "$(jq -r '.[] | select(.name == "eclone").is_esnap_clone' <<< "$lvols")" == "true" ]] + [[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "false" ]] + + # Unload the lvstore and delete the external snapshot + rpc_cmd bdev_aio_delete "$aio_bdev" + NOT rpc_cmd bdev_lvol_get_lvstores -l lvs_test + rpc_cmd bdev_malloc_delete "$esnap_uuid" + + # Load the lvstore, eclone bdev should not exist but the lvol should exist. + # State: + # esnap(missing) <-- eclone(degraded) + bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size") + NOT rpc_cmd bdev_get_bdevs -b lvs_test/eclone + NOT rpc_cmd bdev_get_bdevs -b "$eclone_uuid" + lvols=$(rpc_cmd bdev_lvol_get_lvols) + [[ "$(jq -r '. | length' <<< "$lvols")" == "1" ]] + [[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "true" ]] + + # Reload the lvstore with esnap present during load. This should make the lvol not degraded. + # State: + # esnap(present) <-- eclone(not degraded) + rpc_cmd bdev_aio_delete "$aio_bdev" + NOT rpc_cmd bdev_lvol_get_lvstores -l lvs_test + esnap_dev=$(rpc_cmd bdev_malloc_create -u "$esnap_uuid" "$esnap_size_mb" "$block_size") + bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size") + rpc_cmd bdev_lvol_get_lvstores -l lvs_test + rpc_cmd bdev_get_bdevs -b lvs_test/eclone + rpc_cmd bdev_get_bdevs -b "$eclone_uuid" + lvols=$(rpc_cmd bdev_lvol_get_lvols) + [[ "$(jq -r '. | length' <<< "$lvols")" == "1" ]] + [[ "$(jq -r '.[] | select(.name == "eclone").is_esnap_clone' <<< "$lvols")" == "true" ]] + [[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "false" ]] + + # Create a clone of eclone, then reload without the esnap present. + # State: + # esnap(missing) <-- eclone(degraded) <-- clone(degraded) + rpc_cmd bdev_lvol_set_read_only "$eclone_uuid" + clone_uuid=$(rpc_cmd bdev_lvol_clone "$eclone_uuid" clone) + rpc_cmd bdev_get_bdevs -b lvs_test/clone + rpc_cmd bdev_get_bdevs -b "$clone_uuid" + rpc_cmd bdev_aio_delete "$aio_bdev" + NOT rpc_cmd bdev_lvol_get_lvstores -l lvs_test + rpc_cmd bdev_malloc_delete "$esnap_uuid" + bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size") + lvols=$(rpc_cmd bdev_lvol_get_lvols) + [[ "$(jq -r '.[] | select(.name == "eclone").is_esnap_clone' <<< "$lvols")" == "true" ]] + [[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "true" ]] + [[ "$(jq -r '.[] | select(.name == "clone").is_clone' <<< "$lvols")" == "true" ]] + [[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "true" ]] + NOT rpc_cmd bdev_get_bdevs -b lvs_test/eclone + NOT rpc_cmd bdev_get_bdevs -b "$eclone_uuid" + NOT rpc_cmd bdev_get_bdevs -b lvs_test/clone + NOT rpc_cmd bdev_get_bdevs -b "$clone_uuid" + + # Reload the lvstore with esnap present during load. This should make the lvols not + # degraded. + # State: + # esnap(present) <-- eclone(not degraded) <-- clone(not degraded) + rpc_cmd bdev_aio_delete "$aio_bdev" + NOT rpc_cmd bdev_lvol_get_lvstores -l lvs_test + esnap_dev=$(rpc_cmd bdev_malloc_create -u "$esnap_uuid" "$esnap_size_mb" "$block_size") + bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size") + lvols=$(rpc_cmd bdev_lvol_get_lvols) + [[ "$(jq -r '.[] | select(.name == "eclone").is_esnap_clone' <<< "$lvols")" == "true" ]] + [[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "false" ]] + [[ "$(jq -r '.[] | select(.name == "clone").is_clone' <<< "$lvols")" == "true" ]] + [[ "$(jq -r '.[] | select(.name == "clone").is_degraded' <<< "$lvols")" == "false" ]] + rpc_cmd bdev_get_bdevs -b lvs_test/eclone + rpc_cmd bdev_get_bdevs -b "$eclone_uuid" + rpc_cmd bdev_get_bdevs -b lvs_test/clone + rpc_cmd bdev_get_bdevs -b "$clone_uuid" + + # Create a snapshot of clone, then reload without the esnap present. + # State: + # esnap(missing) <-- eclone(degraded) <-- snap(degraded) <-- clone(degraded) + snap_uuid=$(rpc_cmd bdev_lvol_snapshot "$clone_uuid" snap) + rpc_cmd bdev_get_bdevs -b lvs_test/snap + rpc_cmd bdev_get_bdevs -b "$snap_uuid" + rpc_cmd bdev_aio_delete "$aio_bdev" + NOT rpc_cmd bdev_lvol_get_lvstores -l lvs_test + rpc_cmd bdev_malloc_delete "$esnap_uuid" + bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size") + lvols=$(rpc_cmd bdev_lvol_get_lvols) + [[ "$(jq -r '.[] | select(.name == "eclone").is_esnap_clone' <<< "$lvols")" == "true" ]] + [[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "true" ]] + [[ "$(jq -r '.[] | select(.name == "clone").is_clone' <<< "$lvols")" == "true" ]] + [[ "$(jq -r '.[] | select(.name == "clone").is_degraded' <<< "$lvols")" == "true" ]] + [[ "$(jq -r '.[] | select(.name == "snap").is_clone' <<< "$lvols")" == "true" ]] + [[ "$(jq -r '.[] | select(.name == "snap").is_snapshot' <<< "$lvols")" == "true" ]] + [[ "$(jq -r '.[] | select(.name == "snap").is_degraded' <<< "$lvols")" == "true" ]] + NOT rpc_cmd bdev_get_bdevs -b lvs_test/eclone + NOT rpc_cmd bdev_get_bdevs -b "$eclone_uuid" + NOT rpc_cmd bdev_get_bdevs -b lvs_test/clone + NOT rpc_cmd bdev_get_bdevs -b "$clone_uuid" + NOT rpc_cmd bdev_get_bdevs -b lvs_test/snap + NOT rpc_cmd bdev_get_bdevs -b "$snap_uuid" + + rpc_cmd bdev_aio_delete "$aio_bdev" +} + function log_jq_out() { local key @@ -225,6 +349,7 @@ waitforlisten $spdk_pid modprobe nbd run_test "test_esnap_reload" test_esnap_reload +run_test "test_esnap_reload" test_esnap_reload_missing run_test "test_esnap_clones" test_esnap_clones trap - SIGINT SIGTERM SIGPIPE EXIT diff --git a/test/unit/lib/bdev/vbdev_lvol.c/vbdev_lvol_ut.c b/test/unit/lib/bdev/vbdev_lvol.c/vbdev_lvol_ut.c index 8752fd86b..fe529342e 100644 --- a/test/unit/lib/bdev/vbdev_lvol.c/vbdev_lvol_ut.c +++ b/test/unit/lib/bdev/vbdev_lvol.c/vbdev_lvol_ut.c @@ -7,6 +7,7 @@ #include "spdk_cunit.h" #include "spdk/string.h" +#include "common/lib/ut_multithread.c" #include "bdev/lvol/vbdev_lvol.c" #include "unit/lib/json_mock.c" @@ -44,6 +45,10 @@ DEFINE_STUB(spdk_blob_get_esnap_id, int, DEFINE_STUB(spdk_blob_is_esnap_clone, bool, (const struct spdk_blob *blob), false); DEFINE_STUB(spdk_lvol_iter_immediate_clones, int, (struct spdk_lvol *lvol, spdk_lvol_iter_cb cb_fn, void *cb_arg), -ENOTSUP); +DEFINE_STUB(spdk_lvs_esnap_missing_add, int, + (struct spdk_lvol_store *lvs, struct spdk_lvol *lvol, const void *esnap_id, + uint32_t id_len), -ENOTSUP); +DEFINE_STUB(spdk_blob_is_degraded, bool, (const struct spdk_blob *blob), false); struct spdk_blob { uint64_t id; @@ -1797,27 +1802,37 @@ ut_esnap_dev_create(void) /* Bdev not found */ g_base_bdev = NULL; - rc = vbdev_lvol_esnap_dev_create(&lvs, &lvol, &blob, uuid_str, sizeof(uuid_str), &bs_dev); - CU_ASSERT(rc == -ENODEV); - CU_ASSERT(bs_dev == NULL); - - /* Cannot get a claim */ - g_base_bdev = &bdev; - lvol_already_opened = true; - rc = vbdev_lvol_esnap_dev_create(&lvs, &lvol, &blob, uuid_str, sizeof(uuid_str), &bs_dev); - CU_ASSERT(rc == -EPERM); - CU_ASSERT(bs_dev == NULL); - - /* Happy path */ - lvol_already_opened = false; + MOCK_SET(spdk_blob_is_degraded, true); rc = vbdev_lvol_esnap_dev_create(&lvs, &lvol, &blob, uuid_str, sizeof(uuid_str), &bs_dev); CU_ASSERT(rc == 0); SPDK_CU_ASSERT_FATAL(bs_dev != NULL); + CU_ASSERT(bs_dev->destroy == bs_dev_degraded_destroy); + bs_dev->destroy(bs_dev); + + /* Cannot get a claim */ + /* TODO: This suggests we need a way to wait for a claim to be available. */ + g_base_bdev = &bdev; + lvol_already_opened = true; + MOCK_SET(spdk_blob_is_degraded, true); + rc = vbdev_lvol_esnap_dev_create(&lvs, &lvol, &blob, uuid_str, sizeof(uuid_str), &bs_dev); + CU_ASSERT(rc == 0); + SPDK_CU_ASSERT_FATAL(bs_dev != NULL); + CU_ASSERT(bs_dev->destroy == bs_dev_degraded_destroy); + bs_dev->destroy(bs_dev); + + /* Happy path */ + lvol_already_opened = false; + MOCK_SET(spdk_blob_is_degraded, false); + rc = vbdev_lvol_esnap_dev_create(&lvs, &lvol, &blob, uuid_str, sizeof(uuid_str), &bs_dev); + CU_ASSERT(rc == 0); + SPDK_CU_ASSERT_FATAL(bs_dev != NULL); + CU_ASSERT(bs_dev->destroy == ut_bs_dev_destroy); bs_dev->destroy(bs_dev); g_base_bdev = NULL; lvol_already_opened = false; free(unterminated); + MOCK_CLEAR(spdk_blob_is_degraded); } static void @@ -1910,9 +1925,15 @@ main(int argc, char **argv) CU_ADD_TEST(suite, ut_esnap_dev_create); CU_ADD_TEST(suite, ut_lvol_esnap_clone_bad_args); + allocate_threads(1); + set_thread(0); + CU_basic_set_mode(CU_BRM_VERBOSE); CU_basic_run_tests(); num_failures = CU_get_number_of_failures(); CU_cleanup_registry(); + + free_threads(); + return num_failures; }