vbdev_lvol: degraded open of esnap clones

If an esnap clone is missing its snapshot the lvol should still open in
degraded mode. A degraded lvol will not have a bdev registered and as
such cannot perform any IO.

Change-Id: I736194650dfcf1eb78214c8896c31acc7a946b54
Signed-off-by: Mike Gerdts <mgerdts@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16425
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
Mike Gerdts 2022-10-10 16:31:31 -05:00 committed by David Ko
parent ebb9085755
commit 0fa543f209
3 changed files with 282 additions and 19 deletions

View File

@ -10,6 +10,7 @@
#include "spdk/log.h"
#include "spdk/string.h"
#include "spdk/uuid.h"
#include "spdk/blob.h"
#include "vbdev_lvol.h"
@ -439,6 +440,10 @@ _vbdev_lvs_remove(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void
return;
}
TAILQ_FOREACH_SAFE(lvol, &lvs->lvols, link, tmp) {
if (lvol->bdev == NULL) {
spdk_lvol_close(lvol, _vbdev_lvs_remove_bdev_unregistered_cb, lvs_bdev);
continue;
}
spdk_bdev_unregister(lvol->bdev, _vbdev_lvs_remove_bdev_unregistered_cb, lvs_bdev);
}
}
@ -1003,6 +1008,12 @@ _create_lvol_disk(struct spdk_lvol *lvol, bool destroy)
unsigned char *alias;
int rc;
if (spdk_blob_is_degraded(lvol->blob)) {
SPDK_NOTICELOG("lvol %s: blob is degraded: deferring bdev creation\n",
lvol->unique_id);
return 0;
}
lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store);
if (lvs_bdev == NULL) {
SPDK_ERRLOG("No spdk lvs-bdev pair found for lvol %s\n", lvol->unique_id);
@ -1686,6 +1697,90 @@ vbdev_lvs_grow(struct spdk_lvol_store *lvs,
}
}
/* Begin degraded blobstore device */
/*
* When an external snapshot is missing, an instance of bs_dev_degraded is used as the blob's
* back_bs_dev. No bdev is registered, so there should be no IO nor requests for channels. The main
* purposes of this device are to prevent blobstore from hitting fatal runtime errors and to
* indicate that the blob is degraded via the is_degraded() callback.
*/
static void
bs_dev_degraded_read(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
{
assert(false);
cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EIO);
}
static void
bs_dev_degraded_readv(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
struct iovec *iov, int iovcnt, uint64_t lba, uint32_t lba_count,
struct spdk_bs_dev_cb_args *cb_args)
{
assert(false);
cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EIO);
}
static void
bs_dev_degraded_readv_ext(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
struct iovec *iov, int iovcnt, uint64_t lba, uint32_t lba_count,
struct spdk_bs_dev_cb_args *cb_args,
struct spdk_blob_ext_io_opts *io_opts)
{
assert(false);
cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EIO);
}
static bool
bs_dev_degraded_is_zeroes(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count)
{
assert(false);
return false;
}
static struct spdk_io_channel *
bs_dev_degraded_create_channel(struct spdk_bs_dev *bs_dev)
{
assert(false);
return NULL;
}
static void
bs_dev_degraded_destroy_channel(struct spdk_bs_dev *bs_dev, struct spdk_io_channel *channel)
{
assert(false);
}
static void
bs_dev_degraded_destroy(struct spdk_bs_dev *bs_dev)
{
}
static bool
bs_dev_degraded_is_degraded(struct spdk_bs_dev *bs_dev)
{
return true;
}
static struct spdk_bs_dev bs_dev_degraded = {
.create_channel = bs_dev_degraded_create_channel,
.destroy_channel = bs_dev_degraded_destroy_channel,
.destroy = bs_dev_degraded_destroy,
.read = bs_dev_degraded_read,
.readv = bs_dev_degraded_readv,
.readv_ext = bs_dev_degraded_readv_ext,
.is_zeroes = bs_dev_degraded_is_zeroes,
.is_degraded = bs_dev_degraded_is_degraded,
/* Make the device as large as possible without risk of uint64 overflow. */
.blockcnt = UINT64_MAX / 512,
/* Prevent divide by zero errors calculating LBAs that will never be read. */
.blocklen = 512,
};
/* End degraded blobstore device */
/* Begin external snapshot support */
static void
@ -1701,6 +1796,7 @@ vbdev_lvol_esnap_dev_create(void *bs_ctx, void *blob_ctx, struct spdk_blob *blob
const void *esnap_id, uint32_t id_len,
struct spdk_bs_dev **_bs_dev)
{
struct spdk_lvol_store *lvs = bs_ctx;
struct spdk_lvol *lvol = blob_ctx;
struct spdk_bs_dev *bs_dev = NULL;
struct spdk_uuid uuid;
@ -1735,16 +1831,37 @@ vbdev_lvol_esnap_dev_create(void *bs_ctx, void *blob_ctx, struct spdk_blob *blob
rc = spdk_bdev_create_bs_dev(uuid_str, false, NULL, 0,
vbdev_lvol_esnap_bdev_event_cb, NULL, &bs_dev);
if (rc != 0) {
SPDK_ERRLOG("lvol %s: failed to create bs_dev from bdev '%s': %d\n",
lvol->unique_id, uuid_str, rc);
return rc;
goto fail;
}
rc = spdk_bs_bdev_claim(bs_dev, &g_lvol_if);
if (rc != 0) {
SPDK_ERRLOG("lvol %s: unable to claim esnap bdev '%s': %d\n",
lvol->unique_id, uuid_str, rc);
SPDK_ERRLOG("lvol %s: unable to claim esnap bdev '%s': %d\n", lvol->unique_id,
uuid_str, rc);
bs_dev->destroy(bs_dev);
return rc;
goto fail;
}
*_bs_dev = bs_dev;
return 0;
fail:
/* Unable to open or claim the bdev. This lvol is degraded. */
bs_dev = &bs_dev_degraded;
SPDK_NOTICELOG("lvol %s: bdev %s not available: lvol is degraded\n", lvol->unique_id,
uuid_str);
/*
* Be sure not to call spdk_lvs_missing_add() on an lvol that is already degraded. This can
* lead to a cycle in the degraded_lvols tailq.
*/
if (lvol->degraded_set == NULL) {
rc = spdk_lvs_esnap_missing_add(lvs, lvol, uuid_str, sizeof(uuid_str));
if (rc != 0) {
SPDK_NOTICELOG("lvol %s: unable to register missing esnap device %s: "
"it will not be hotplugged if added later\n",
lvol->unique_id, uuid_str);
}
}
*_bs_dev = bs_dev;

View File

@ -75,6 +75,130 @@ function test_esnap_reload() {
rpc_cmd bdev_malloc_delete "$esnap_dev"
}
function test_esnap_reload_missing() {
local bs_dev esnap_dev
local block_size=512
local esnap_size_mb=1
local lvs_cluster_size=$((16 * 1024))
local lvs_uuid esnap_uuid eclone_uuid snap_uuid clone_uuid uuid
local aio_bdev=test_esnap_reload_aio0
local lvols
# Create the lvstore on an aio device. Can't use malloc because we need to remove
# the device and re-add it to trigger an lvstore unload and then load.
rm -f $testdir/aio_bdev_0
truncate -s "${AIO_SIZE_MB}M" $testdir/aio_bdev_0
bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size")
lvs_uuid=$(rpc_cmd bdev_lvol_create_lvstore -c "$lvs_cluster_size" "$bs_dev" lvs_test)
# Create a bdev that will be the external snapshot
# State:
# esnap(present) <-- eclone(not degraded)
esnap_uuid=e4b40d8b-f623-416d-8234-baf5a4c83cbd
esnap_dev=$(rpc_cmd bdev_malloc_create -u "$esnap_uuid" "$esnap_size_mb" "$block_size")
eclone_uuid=$(rpc_cmd bdev_lvol_clone_bdev "$esnap_uuid" lvs_test "eclone")
lvols=$(rpc_cmd bdev_lvol_get_lvols)
[[ "$(jq -r '. | length' <<< "$lvols")" == "1" ]]
[[ "$(jq -r '.[] | select(.name == "eclone").is_esnap_clone' <<< "$lvols")" == "true" ]]
[[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "false" ]]
# Unload the lvstore and delete the external snapshot
rpc_cmd bdev_aio_delete "$aio_bdev"
NOT rpc_cmd bdev_lvol_get_lvstores -l lvs_test
rpc_cmd bdev_malloc_delete "$esnap_uuid"
# Load the lvstore, eclone bdev should not exist but the lvol should exist.
# State:
# esnap(missing) <-- eclone(degraded)
bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size")
NOT rpc_cmd bdev_get_bdevs -b lvs_test/eclone
NOT rpc_cmd bdev_get_bdevs -b "$eclone_uuid"
lvols=$(rpc_cmd bdev_lvol_get_lvols)
[[ "$(jq -r '. | length' <<< "$lvols")" == "1" ]]
[[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "true" ]]
# Reload the lvstore with esnap present during load. This should make the lvol not degraded.
# State:
# esnap(present) <-- eclone(not degraded)
rpc_cmd bdev_aio_delete "$aio_bdev"
NOT rpc_cmd bdev_lvol_get_lvstores -l lvs_test
esnap_dev=$(rpc_cmd bdev_malloc_create -u "$esnap_uuid" "$esnap_size_mb" "$block_size")
bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size")
rpc_cmd bdev_lvol_get_lvstores -l lvs_test
rpc_cmd bdev_get_bdevs -b lvs_test/eclone
rpc_cmd bdev_get_bdevs -b "$eclone_uuid"
lvols=$(rpc_cmd bdev_lvol_get_lvols)
[[ "$(jq -r '. | length' <<< "$lvols")" == "1" ]]
[[ "$(jq -r '.[] | select(.name == "eclone").is_esnap_clone' <<< "$lvols")" == "true" ]]
[[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "false" ]]
# Create a clone of eclone, then reload without the esnap present.
# State:
# esnap(missing) <-- eclone(degraded) <-- clone(degraded)
rpc_cmd bdev_lvol_set_read_only "$eclone_uuid"
clone_uuid=$(rpc_cmd bdev_lvol_clone "$eclone_uuid" clone)
rpc_cmd bdev_get_bdevs -b lvs_test/clone
rpc_cmd bdev_get_bdevs -b "$clone_uuid"
rpc_cmd bdev_aio_delete "$aio_bdev"
NOT rpc_cmd bdev_lvol_get_lvstores -l lvs_test
rpc_cmd bdev_malloc_delete "$esnap_uuid"
bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size")
lvols=$(rpc_cmd bdev_lvol_get_lvols)
[[ "$(jq -r '.[] | select(.name == "eclone").is_esnap_clone' <<< "$lvols")" == "true" ]]
[[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "true" ]]
[[ "$(jq -r '.[] | select(.name == "clone").is_clone' <<< "$lvols")" == "true" ]]
[[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "true" ]]
NOT rpc_cmd bdev_get_bdevs -b lvs_test/eclone
NOT rpc_cmd bdev_get_bdevs -b "$eclone_uuid"
NOT rpc_cmd bdev_get_bdevs -b lvs_test/clone
NOT rpc_cmd bdev_get_bdevs -b "$clone_uuid"
# Reload the lvstore with esnap present during load. This should make the lvols not
# degraded.
# State:
# esnap(present) <-- eclone(not degraded) <-- clone(not degraded)
rpc_cmd bdev_aio_delete "$aio_bdev"
NOT rpc_cmd bdev_lvol_get_lvstores -l lvs_test
esnap_dev=$(rpc_cmd bdev_malloc_create -u "$esnap_uuid" "$esnap_size_mb" "$block_size")
bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size")
lvols=$(rpc_cmd bdev_lvol_get_lvols)
[[ "$(jq -r '.[] | select(.name == "eclone").is_esnap_clone' <<< "$lvols")" == "true" ]]
[[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "false" ]]
[[ "$(jq -r '.[] | select(.name == "clone").is_clone' <<< "$lvols")" == "true" ]]
[[ "$(jq -r '.[] | select(.name == "clone").is_degraded' <<< "$lvols")" == "false" ]]
rpc_cmd bdev_get_bdevs -b lvs_test/eclone
rpc_cmd bdev_get_bdevs -b "$eclone_uuid"
rpc_cmd bdev_get_bdevs -b lvs_test/clone
rpc_cmd bdev_get_bdevs -b "$clone_uuid"
# Create a snapshot of clone, then reload without the esnap present.
# State:
# esnap(missing) <-- eclone(degraded) <-- snap(degraded) <-- clone(degraded)
snap_uuid=$(rpc_cmd bdev_lvol_snapshot "$clone_uuid" snap)
rpc_cmd bdev_get_bdevs -b lvs_test/snap
rpc_cmd bdev_get_bdevs -b "$snap_uuid"
rpc_cmd bdev_aio_delete "$aio_bdev"
NOT rpc_cmd bdev_lvol_get_lvstores -l lvs_test
rpc_cmd bdev_malloc_delete "$esnap_uuid"
bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size")
lvols=$(rpc_cmd bdev_lvol_get_lvols)
[[ "$(jq -r '.[] | select(.name == "eclone").is_esnap_clone' <<< "$lvols")" == "true" ]]
[[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "true" ]]
[[ "$(jq -r '.[] | select(.name == "clone").is_clone' <<< "$lvols")" == "true" ]]
[[ "$(jq -r '.[] | select(.name == "clone").is_degraded' <<< "$lvols")" == "true" ]]
[[ "$(jq -r '.[] | select(.name == "snap").is_clone' <<< "$lvols")" == "true" ]]
[[ "$(jq -r '.[] | select(.name == "snap").is_snapshot' <<< "$lvols")" == "true" ]]
[[ "$(jq -r '.[] | select(.name == "snap").is_degraded' <<< "$lvols")" == "true" ]]
NOT rpc_cmd bdev_get_bdevs -b lvs_test/eclone
NOT rpc_cmd bdev_get_bdevs -b "$eclone_uuid"
NOT rpc_cmd bdev_get_bdevs -b lvs_test/clone
NOT rpc_cmd bdev_get_bdevs -b "$clone_uuid"
NOT rpc_cmd bdev_get_bdevs -b lvs_test/snap
NOT rpc_cmd bdev_get_bdevs -b "$snap_uuid"
rpc_cmd bdev_aio_delete "$aio_bdev"
}
function log_jq_out() {
local key
@ -225,6 +349,7 @@ waitforlisten $spdk_pid
modprobe nbd
run_test "test_esnap_reload" test_esnap_reload
run_test "test_esnap_reload" test_esnap_reload_missing
run_test "test_esnap_clones" test_esnap_clones
trap - SIGINT SIGTERM SIGPIPE EXIT

View File

@ -7,6 +7,7 @@
#include "spdk_cunit.h"
#include "spdk/string.h"
#include "common/lib/ut_multithread.c"
#include "bdev/lvol/vbdev_lvol.c"
#include "unit/lib/json_mock.c"
@ -44,6 +45,10 @@ DEFINE_STUB(spdk_blob_get_esnap_id, int,
DEFINE_STUB(spdk_blob_is_esnap_clone, bool, (const struct spdk_blob *blob), false);
DEFINE_STUB(spdk_lvol_iter_immediate_clones, int,
(struct spdk_lvol *lvol, spdk_lvol_iter_cb cb_fn, void *cb_arg), -ENOTSUP);
DEFINE_STUB(spdk_lvs_esnap_missing_add, int,
(struct spdk_lvol_store *lvs, struct spdk_lvol *lvol, const void *esnap_id,
uint32_t id_len), -ENOTSUP);
DEFINE_STUB(spdk_blob_is_degraded, bool, (const struct spdk_blob *blob), false);
struct spdk_blob {
uint64_t id;
@ -1797,27 +1802,37 @@ ut_esnap_dev_create(void)
/* Bdev not found */
g_base_bdev = NULL;
rc = vbdev_lvol_esnap_dev_create(&lvs, &lvol, &blob, uuid_str, sizeof(uuid_str), &bs_dev);
CU_ASSERT(rc == -ENODEV);
CU_ASSERT(bs_dev == NULL);
/* Cannot get a claim */
g_base_bdev = &bdev;
lvol_already_opened = true;
rc = vbdev_lvol_esnap_dev_create(&lvs, &lvol, &blob, uuid_str, sizeof(uuid_str), &bs_dev);
CU_ASSERT(rc == -EPERM);
CU_ASSERT(bs_dev == NULL);
/* Happy path */
lvol_already_opened = false;
MOCK_SET(spdk_blob_is_degraded, true);
rc = vbdev_lvol_esnap_dev_create(&lvs, &lvol, &blob, uuid_str, sizeof(uuid_str), &bs_dev);
CU_ASSERT(rc == 0);
SPDK_CU_ASSERT_FATAL(bs_dev != NULL);
CU_ASSERT(bs_dev->destroy == bs_dev_degraded_destroy);
bs_dev->destroy(bs_dev);
/* Cannot get a claim */
/* TODO: This suggests we need a way to wait for a claim to be available. */
g_base_bdev = &bdev;
lvol_already_opened = true;
MOCK_SET(spdk_blob_is_degraded, true);
rc = vbdev_lvol_esnap_dev_create(&lvs, &lvol, &blob, uuid_str, sizeof(uuid_str), &bs_dev);
CU_ASSERT(rc == 0);
SPDK_CU_ASSERT_FATAL(bs_dev != NULL);
CU_ASSERT(bs_dev->destroy == bs_dev_degraded_destroy);
bs_dev->destroy(bs_dev);
/* Happy path */
lvol_already_opened = false;
MOCK_SET(spdk_blob_is_degraded, false);
rc = vbdev_lvol_esnap_dev_create(&lvs, &lvol, &blob, uuid_str, sizeof(uuid_str), &bs_dev);
CU_ASSERT(rc == 0);
SPDK_CU_ASSERT_FATAL(bs_dev != NULL);
CU_ASSERT(bs_dev->destroy == ut_bs_dev_destroy);
bs_dev->destroy(bs_dev);
g_base_bdev = NULL;
lvol_already_opened = false;
free(unterminated);
MOCK_CLEAR(spdk_blob_is_degraded);
}
static void
@ -1910,9 +1925,15 @@ main(int argc, char **argv)
CU_ADD_TEST(suite, ut_esnap_dev_create);
CU_ADD_TEST(suite, ut_lvol_esnap_clone_bad_args);
allocate_threads(1);
set_thread(0);
CU_basic_set_mode(CU_BRM_VERBOSE);
CU_basic_run_tests();
num_failures = CU_get_number_of_failures();
CU_cleanup_registry();
free_threads();
return num_failures;
}