lvol: esnap clones must end on cluster boundary

When regular lvols are created, their size is rounded up to the next
cluster boundary. This is not acceptable for esnap clones as this means
that the clone may be silently grown larger than external snapshot. This
can cause a variety of problems for the consumer of an esnap clone lvol.

While the better long-term solution is to allow lvol sizes to fall on
any block boundary, the implementation of that needs to be suprisingly
complex to support creation and deletion of snapshots and clones of
esnap clones, inflation, and backward compatibility.

For now, it is best to put in a restriction on the esnap clone size
during creation so as to not hit problems long after creation. Since
lvols are generally expected to be large relative to the cluster size,
it is somewhat unlikely that this restriction will be a significant
limitation.

Signed-off-by: Mike Gerdts <mgerdts@nvidia.com>
Change-Id: Id7a628f852a40c8ec2b7146504183943d723deba
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/17607
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Mike Gerdts 2023-04-17 23:20:26 -05:00 committed by David Ko
parent dcd012e8d0
commit 57b47f209f
3 changed files with 49 additions and 19 deletions

View File

@ -224,11 +224,15 @@ void spdk_lvol_create_clone(struct spdk_lvol *lvol, const char *clone_name,
*
* \param esnap_id The identifier that will be passed to the spdk_bs_esnap_dev_create callback.
* \param id_len The length of esnap_id, in bytes.
* \param size_bytes The size of the external snapshot device, in bytes.
* \param size_bytes The size of the external snapshot device, in bytes. This must be an integer
* multiple of the lvolstore's cluster size. See \c cluster_sz in \struct spdk_lvs_opts.
* \param lvs Handle to lvolstore.
* \param clone_name Name of created clone.
* \param cb_fn Completion callback.
* \param cb_arg Completion callback custom arguments.
* \return 0 if parameters pass verification checks and the esnap creation is started, in which case
* the \c cb_fn will be used to report the completion status. If an error is encountered, a negative
* errno will be returned and \c cb_fn will not be called.
*/
int spdk_lvol_create_esnap_clone(const void *esnap_id, uint32_t id_len, uint64_t size_bytes,
struct spdk_lvol_store *lvs, const char *clone_name,

View File

@ -1238,6 +1238,7 @@ spdk_lvol_create_esnap_clone(const void *esnap_id, uint32_t id_len, uint64_t siz
struct spdk_blob_store *bs;
struct spdk_lvol *lvol;
struct spdk_blob_opts opts;
uint64_t cluster_sz;
char *xattr_names[] = {LVOL_NAME, "uuid"};
int rc;
@ -1253,6 +1254,14 @@ spdk_lvol_create_esnap_clone(const void *esnap_id, uint32_t id_len, uint64_t siz
bs = lvs->blobstore;
cluster_sz = spdk_bs_get_cluster_size(bs);
if ((size_bytes % cluster_sz) != 0) {
SPDK_ERRLOG("Cannot create '%s/%s': size %" PRIu64 " is not an integer multiple of "
"cluster size %" PRIu64 "\n", lvs->name, clone_name, size_bytes,
cluster_sz);
return -EINVAL;
}
req = calloc(1, sizeof(*req));
if (!req) {
SPDK_ERRLOG("Cannot alloc memory for lvol request pointer\n");
@ -1273,7 +1282,7 @@ spdk_lvol_create_esnap_clone(const void *esnap_id, uint32_t id_len, uint64_t siz
opts.esnap_id = esnap_id;
opts.esnap_id_len = id_len;
opts.thin_provision = true;
opts.num_clusters = spdk_divide_round_up(size_bytes, spdk_bs_get_cluster_size(bs));
opts.num_clusters = spdk_divide_round_up(size_bytes, cluster_sz);
opts.clear_method = lvol->clear_method;
opts.xattrs.count = SPDK_COUNTOF(xattr_names);
opts.xattrs.names = xattr_names;

View File

@ -2409,10 +2409,13 @@ lvol_esnap_create_bad_args(void)
struct ut_cb_res lvres1, lvres2;
struct spdk_lvol *lvol;
char uuid_str[SPDK_UUID_STRING_LEN];
uint64_t block_sz, cluster_sz;
init_dev(&dev);
block_sz = dev.bs_dev.blocklen;
spdk_lvs_opts_init(&opts);
cluster_sz = opts.cluster_sz;
snprintf(opts.name, sizeof(opts.name), "lvs");
opts.esnap_bs_dev_create = ut_esnap_bs_dev_create;
g_lvserrno = -1;
@ -2426,18 +2429,25 @@ lvol_esnap_create_bad_args(void)
MOCK_SET(spdk_bdev_get_by_name, &esnap_bdev);
/* error with lvs == NULL */
rc = spdk_lvol_create_esnap_clone(uuid_str, strlen(uuid_str), 1, NULL, "clone1",
rc = spdk_lvol_create_esnap_clone(uuid_str, strlen(uuid_str), cluster_sz, NULL, "clone1",
lvol_op_with_handle_complete, NULL);
CU_ASSERT(rc == -EINVAL);
/* error with clone name that is too short */
rc = spdk_lvol_create_esnap_clone(uuid_str, strlen(uuid_str), 1, g_lvol_store, "",
rc = spdk_lvol_create_esnap_clone(uuid_str, strlen(uuid_str), cluster_sz, g_lvol_store, "",
lvol_op_with_handle_complete, NULL);
CU_ASSERT(rc == -EINVAL);
/* error with clone name that is too long */
memset(long_name, 'a', sizeof(long_name));
rc = spdk_lvol_create_esnap_clone(uuid_str, strlen(uuid_str), 1, g_lvol_store, long_name,
rc = spdk_lvol_create_esnap_clone(uuid_str, strlen(uuid_str), cluster_sz, g_lvol_store,
long_name, lvol_op_with_handle_complete, NULL);
CU_ASSERT(rc == -EINVAL);
/* error with size that is not a multiple of an integer multiple of cluster_sz */
CU_ASSERT(((cluster_sz + block_sz) % cluster_sz) != 0);
rc = spdk_lvol_create_esnap_clone(uuid_str, strlen(uuid_str), cluster_sz + block_sz,
g_lvol_store, "clone1",
lvol_op_with_handle_complete, NULL);
CU_ASSERT(rc == -EINVAL);
@ -2447,8 +2457,8 @@ lvol_esnap_create_bad_args(void)
CU_ASSERT(g_lvserrno == 0);
SPDK_CU_ASSERT_FATAL(g_lvol != NULL);
lvol = g_lvol;
rc = spdk_lvol_create_esnap_clone(uuid_str, strlen(uuid_str), 1, g_lvol_store, "lvol",
lvol_op_with_handle_complete, NULL);
rc = spdk_lvol_create_esnap_clone(uuid_str, strlen(uuid_str), cluster_sz, g_lvol_store,
"lvol", lvol_op_with_handle_complete, NULL);
CU_ASSERT(rc == -EEXIST);
spdk_lvol_close(lvol, op_complete, ut_cb_res_clear(&lvres1));
spdk_lvol_destroy(lvol, op_complete, ut_cb_res_clear(&lvres2));
@ -2458,11 +2468,12 @@ lvol_esnap_create_bad_args(void)
g_lvol = NULL;
/* error when two clones created at the same time with the same name */
rc = spdk_lvol_create_esnap_clone(uuid_str, strlen(uuid_str), 1, g_lvol_store, "clone1",
lvol_op_with_handle_complete, ut_cb_res_clear(&lvres1));
CU_ASSERT(rc == 0);
rc = spdk_lvol_create_esnap_clone(uuid_str, strlen(uuid_str), 1, g_lvol_store, "clone1",
lvol_op_with_handle_complete, ut_cb_res_clear(&lvres2));
rc = spdk_lvol_create_esnap_clone(uuid_str, strlen(uuid_str), cluster_sz, g_lvol_store,
"clone1", lvol_op_with_handle_complete,
ut_cb_res_clear(&lvres1));
rc = spdk_lvol_create_esnap_clone(uuid_str, strlen(uuid_str), cluster_sz, g_lvol_store,
"clone1", lvol_op_with_handle_complete,
ut_cb_res_clear(&lvres2));
CU_ASSERT(rc == -EEXIST);
poll_threads();
CU_ASSERT(g_lvol != NULL);
@ -2493,11 +2504,13 @@ lvol_esnap_create_delete(void)
struct spdk_lvs_opts opts;
char uuid_str[SPDK_UUID_STRING_LEN];
int rc;
uint64_t cluster_sz;
init_dev(&dev);
init_dev(&g_esnap_dev);
spdk_lvs_opts_init(&opts);
cluster_sz = opts.cluster_sz;
snprintf(opts.name, sizeof(opts.name), "lvs");
opts.esnap_bs_dev_create = ut_esnap_bs_dev_create;
g_lvserrno = -1;
@ -2510,8 +2523,8 @@ lvol_esnap_create_delete(void)
init_bdev(&esnap_bdev, "bdev1", BS_CLUSTER_SIZE);
CU_ASSERT(spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &esnap_bdev.uuid) == 0);
MOCK_SET(spdk_bdev_get_by_name, &esnap_bdev);
rc = spdk_lvol_create_esnap_clone(uuid_str, strlen(uuid_str), 1, g_lvol_store, "clone1",
lvol_op_with_handle_complete, NULL);
rc = spdk_lvol_create_esnap_clone(uuid_str, strlen(uuid_str), cluster_sz, g_lvol_store,
"clone1", lvol_op_with_handle_complete, NULL);
CU_ASSERT(rc == 0);
poll_threads();
CU_ASSERT(g_lvserrno == 0);
@ -2637,11 +2650,13 @@ lvol_esnap_missing(void)
const char *name1 = "lvol1";
const char *name2 = "lvol2";
char uuid_str[SPDK_UUID_STRING_LEN];
uint64_t cluster_sz;
int rc;
/* Create an lvstore */
init_dev(&dev);
spdk_lvs_opts_init(&opts);
cluster_sz = opts.cluster_sz;
snprintf(opts.name, sizeof(opts.name), "lvs");
g_lvserrno = -1;
rc = spdk_lvs_init(&dev.bs_dev, &opts, lvol_store_op_with_handle_complete, NULL);
@ -2665,8 +2680,9 @@ lvol_esnap_missing(void)
init_bdev(&esnap_bdev, "bdev1", BS_CLUSTER_SIZE);
CU_ASSERT(spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &esnap_bdev.uuid) == 0);
MOCK_SET(spdk_bdev_get_by_name, &esnap_bdev);
rc = spdk_lvol_create_esnap_clone(uuid_str, sizeof(uuid_str), 1, g_lvol_store, name1,
lvol_op_with_handle_complete, ut_cb_res_clear(&cb_res));
rc = spdk_lvol_create_esnap_clone(uuid_str, sizeof(uuid_str), cluster_sz, g_lvol_store,
name1, lvol_op_with_handle_complete,
ut_cb_res_clear(&cb_res));
CU_ASSERT(rc == -EEXIST);
CU_ASSERT(ut_cb_res_untouched(&cb_res));
MOCK_CLEAR(spdk_bdev_get_by_name);
@ -2680,8 +2696,9 @@ lvol_esnap_missing(void)
/* Using a unique lvol name allows the clone to be created. */
MOCK_SET(spdk_bdev_get_by_name, &esnap_bdev);
MOCK_SET(spdk_blob_is_esnap_clone, true);
rc = spdk_lvol_create_esnap_clone(uuid_str, sizeof(uuid_str), 1, g_lvol_store, name2,
lvol_op_with_handle_complete, ut_cb_res_clear(&cb_res));
rc = spdk_lvol_create_esnap_clone(uuid_str, sizeof(uuid_str), cluster_sz, g_lvol_store,
name2, lvol_op_with_handle_complete,
ut_cb_res_clear(&cb_res));
SPDK_CU_ASSERT_FATAL(rc == 0);
CU_ASSERT(cb_res.err == 0);
SPDK_CU_ASSERT_FATAL(cb_res.data != NULL);
@ -2925,7 +2942,7 @@ lvol_esnap_hotplug_scenario(struct hotplug_lvol *hotplug_lvols,
g_lvserrno = 0xbad;
rc = spdk_lvol_create_esnap_clone(hp_lvol->esnap_id, hp_lvol->id_len,
1, lvs, hp_lvol->lvol_name,
opts.cluster_sz, lvs, hp_lvol->lvol_name,
lvol_op_with_handle_complete, NULL);
CU_ASSERT(rc == 0);
poll_threads();