lvol: lvol destruction race leads to null deref
As an lvolstore is being destroyed, _vbdev_lvs_remove() starts an interation through the lvols to delete each one, ultimately leading to the destruction of the lvolstore with a call to lvs_free(). The callback passed to vbdev_lvs_destruct() is always called asynchronously via spdk_io_device_unregister() in bs_free(). When the lvolstore resides on bdevs that perform async IO (i.e. most bdevs other than malloc), this gives a small window when the lvol bdev is not registered but a lookup with spdk_lvol_get_by_uuid() or spdk_lvol_get_by_names() will succeed. If rpc_bdev_lvol_delete() runs during this window, it can get a reference to an lvol that has just been unregistered and lvol->blob may be NULL. This lvol is then passed to vbdev_lvol_destroy(). Before this fix, vbdev_lvol_destroy() would call: spdk_blob_is_degraded(lvol->blob); Which would then lead to a NULL pointer dereference, as spdk_blob_is_degraded() assumes a valid blob is passed. While a NULL check would avoid this particular problem, a NULL blob is not necessarily caused by the condition described above. It would better to flag the lvstore's destruction before returning from vbdev_lvs_destruct() and use that flag to prevent operations on the lvolstore that is being deleted. Such a flag already exists in the form of 'lvs_bdev->req != NULL', but that is set too late to close this race. This fix introduces lvs_bdev->removal_in_progress which is set prior to returning from vbdev_lvs_unload() and vbdev_lvs_destruct(). It is checked by vbdev_lvol_destroy() before trying to destroy the lvol. Now, any lvol destruction initiated by something other than vbdev_lvs_destruct() while an lvolstore unload or destroy is in progress will fail with -ENODEV. Fixes issue: #2998 Signed-off-by: Mike Gerdts <mgerdts@nvidia.com> Change-Id: I4d861879097703b0d8e3180e6de7ad6898f340fd Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/17891 Community-CI: Mellanox Build Bot Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
This commit is contained in:
parent
ee5ea993a0
commit
b240c2b103
@ -41,6 +41,8 @@ struct spdk_bdev_module g_lvol_if = {
|
||||
|
||||
SPDK_BDEV_MODULE_REGISTER(lvol, &g_lvol_if)
|
||||
|
||||
static void _vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg);
|
||||
|
||||
struct lvol_store_bdev *
|
||||
vbdev_get_lvs_bdev_by_lvs(struct spdk_lvol_store *lvs_orig)
|
||||
{
|
||||
@ -50,8 +52,11 @@ vbdev_get_lvs_bdev_by_lvs(struct spdk_lvol_store *lvs_orig)
|
||||
while (lvs_bdev != NULL) {
|
||||
lvs = lvs_bdev->lvs;
|
||||
if (lvs == lvs_orig) {
|
||||
if (lvs_bdev->req != NULL) {
|
||||
/* We do not allow access to lvs that are being destroyed */
|
||||
if (lvs_bdev->removal_in_progress) {
|
||||
/* We do not allow access to lvs that are being unloaded or
|
||||
* destroyed */
|
||||
SPDK_DEBUGLOG(vbdev_lvol, "lvs %s: removal in progress\n",
|
||||
lvs_orig->name);
|
||||
return NULL;
|
||||
} else {
|
||||
return lvs_bdev;
|
||||
@ -120,8 +125,11 @@ vbdev_get_lvs_bdev_by_bdev(struct spdk_bdev *bdev_orig)
|
||||
|
||||
while (lvs_bdev != NULL) {
|
||||
if (lvs_bdev->bdev == bdev_orig) {
|
||||
if (lvs_bdev->req != NULL) {
|
||||
/* We do not allow access to lvs that are being destroyed */
|
||||
if (lvs_bdev->removal_in_progress) {
|
||||
/* We do not allow access to lvs that are being unloaded or
|
||||
* destroyed */
|
||||
SPDK_DEBUGLOG(vbdev_lvol, "lvs %s: removal in progress\n",
|
||||
lvs_bdev->lvs->name);
|
||||
return NULL;
|
||||
} else {
|
||||
return lvs_bdev;
|
||||
@ -359,7 +367,7 @@ _vbdev_lvs_remove_lvol_cb(void *cb_arg, int lvolerrno)
|
||||
lvol = TAILQ_FIRST(&lvs->lvols);
|
||||
while (lvol != NULL) {
|
||||
if (spdk_lvol_deletable(lvol)) {
|
||||
vbdev_lvol_destroy(lvol, _vbdev_lvs_remove_lvol_cb, lvs_bdev);
|
||||
_vbdev_lvol_destroy(lvol, _vbdev_lvs_remove_lvol_cb, lvs_bdev);
|
||||
return;
|
||||
}
|
||||
lvol = TAILQ_NEXT(lvol, link);
|
||||
@ -425,6 +433,8 @@ _vbdev_lvs_remove(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void
|
||||
return;
|
||||
}
|
||||
|
||||
lvs_bdev->removal_in_progress = true;
|
||||
|
||||
req->cb_fn = cb_fn;
|
||||
req->cb_arg = cb_arg;
|
||||
lvs_bdev->req = req;
|
||||
@ -611,8 +621,8 @@ _vbdev_lvol_destroy_cb(void *cb_arg, int bdeverrno)
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
void
|
||||
vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg)
|
||||
static void
|
||||
_vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg)
|
||||
{
|
||||
struct vbdev_lvol_destroy_ctx *ctx;
|
||||
size_t count;
|
||||
@ -620,6 +630,10 @@ vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb
|
||||
assert(lvol != NULL);
|
||||
assert(cb_fn != NULL);
|
||||
|
||||
/* Callers other than _vbdev_lvs_remove() must ensure the lvstore is not being removed. */
|
||||
assert(cb_fn == _vbdev_lvs_remove_lvol_cb ||
|
||||
vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store) != NULL);
|
||||
|
||||
/* Check if it is possible to delete lvol */
|
||||
spdk_blob_get_clones(lvol->lvol_store->blobstore, lvol->blob_id, NULL, &count);
|
||||
if (count > 1) {
|
||||
@ -647,6 +661,26 @@ vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb
|
||||
spdk_bdev_unregister(lvol->bdev, _vbdev_lvol_destroy_cb, ctx);
|
||||
}
|
||||
|
||||
void
|
||||
vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg)
|
||||
{
|
||||
struct lvol_store_bdev *lvs_bdev;
|
||||
|
||||
/*
|
||||
* During destruction of an lvolstore, _vbdev_lvs_unload() iterates through lvols until they
|
||||
* are all deleted. There may be some IO required
|
||||
*/
|
||||
lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store);
|
||||
if (lvs_bdev == NULL) {
|
||||
SPDK_DEBUGLOG(vbdev_lvol, "lvol %s: lvolstore is being removed\n",
|
||||
lvol->unique_id);
|
||||
cb_fn(cb_arg, -ENODEV);
|
||||
return;
|
||||
}
|
||||
|
||||
_vbdev_lvol_destroy(lvol, cb_fn, cb_arg);
|
||||
}
|
||||
|
||||
static char *
|
||||
vbdev_lvol_find_name(struct spdk_lvol *lvol, spdk_blob_id blob_id)
|
||||
{
|
||||
@ -1086,6 +1120,7 @@ _create_lvol_disk(struct spdk_lvol *lvol, bool destroy)
|
||||
lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store);
|
||||
if (lvs_bdev == NULL) {
|
||||
SPDK_ERRLOG("No spdk lvs-bdev pair found for lvol %s\n", lvol->unique_id);
|
||||
assert(false);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@ struct lvol_store_bdev {
|
||||
struct spdk_lvol_store *lvs;
|
||||
struct spdk_bdev *bdev;
|
||||
struct spdk_lvs_req *req;
|
||||
bool removal_in_progress;
|
||||
|
||||
TAILQ_ENTRY(lvol_store_bdev) lvol_stores;
|
||||
};
|
||||
|
@ -672,6 +672,91 @@ esnap_remove_degraded(void)
|
||||
CU_ASSERT(rc == 0);
|
||||
}
|
||||
|
||||
static void
|
||||
late_delete(void)
|
||||
{
|
||||
char aiopath[PATH_MAX];
|
||||
struct spdk_lvol_store *lvs = NULL;
|
||||
const uint32_t bs_size_bytes = 10 * 1024 * 1024;
|
||||
const uint32_t bs_block_size = 4096;
|
||||
const uint32_t cluster_size = 32 * 1024;
|
||||
struct op_with_handle_data owh_data;
|
||||
struct lvol_bdev *lvol_bdev;
|
||||
struct spdk_lvol *lvol;
|
||||
int rc, rc2;
|
||||
int count;
|
||||
|
||||
/* Create device for lvstore. This cannot be a malloc bdev because we need to sneak a
|
||||
* deletion in while blob_persist() is in progress.
|
||||
*/
|
||||
rc = make_test_file(bs_size_bytes, aiopath, sizeof(aiopath), "late_delete.aio");
|
||||
SPDK_CU_ASSERT_FATAL(rc == 0);
|
||||
rc = create_aio_bdev("aio1", aiopath, bs_block_size, false);
|
||||
SPDK_CU_ASSERT_FATAL(rc == 0);
|
||||
poll_threads();
|
||||
|
||||
/* Create lvstore */
|
||||
rc = vbdev_lvs_create("aio1", "lvs1", cluster_size, 0, 0,
|
||||
lvs_op_with_handle_cb, clear_owh(&owh_data));
|
||||
SPDK_CU_ASSERT_FATAL(rc == 0);
|
||||
poll_error_updated(&owh_data.lvserrno);
|
||||
SPDK_CU_ASSERT_FATAL(owh_data.lvserrno == 0);
|
||||
SPDK_CU_ASSERT_FATAL(owh_data.u.lvs != NULL);
|
||||
lvs = owh_data.u.lvs;
|
||||
|
||||
/* Create an lvol */
|
||||
vbdev_lvol_create(lvs, "lvol", 1, true, LVOL_CLEAR_WITH_DEFAULT,
|
||||
lvol_op_with_handle_cb, clear_owh(&owh_data));
|
||||
poll_error_updated(&owh_data.lvserrno);
|
||||
CU_ASSERT(owh_data.lvserrno == 0);
|
||||
CU_ASSERT(owh_data.u.lvol != NULL);
|
||||
|
||||
/* Verify the lvol can be found both ways */
|
||||
CU_ASSERT(spdk_bdev_get_by_name("lvs1/lvol") != NULL);
|
||||
CU_ASSERT(spdk_lvol_get_by_names("lvs1", "lvol") != NULL);
|
||||
|
||||
/*
|
||||
* Once the lvolstore deletion starts, it will briefly be possible to look up lvol bdevs and
|
||||
* degraded lvols in that lvolstore but any attempt to delete them will fail with -ENODEV.
|
||||
*/
|
||||
rc = 0xbad;
|
||||
count = 0;
|
||||
vbdev_lvs_destruct(lvs, lvol_op_complete_cb, &rc);
|
||||
do {
|
||||
lvol_bdev = (struct lvol_bdev *)spdk_bdev_get_by_name("lvs1/lvol");
|
||||
if (lvol_bdev != NULL) {
|
||||
rc2 = 0xbad;
|
||||
vbdev_lvol_destroy(lvol_bdev->lvol, lvol_op_complete_cb, &rc2);
|
||||
CU_ASSERT(rc2 == -ENODEV);
|
||||
}
|
||||
lvol = spdk_lvol_get_by_names("lvs1", "lvol");
|
||||
if (lvol != NULL) {
|
||||
/* If we are here, we are likely reproducing #2998 */
|
||||
rc2 = 0xbad;
|
||||
vbdev_lvol_destroy(lvol, lvol_op_complete_cb, &rc2);
|
||||
CU_ASSERT(rc2 == -ENODEV);
|
||||
count++;
|
||||
}
|
||||
|
||||
spdk_thread_poll(g_ut_threads[0].thread, 0, 0);
|
||||
} while (rc == 0xbad);
|
||||
|
||||
/* Ensure that the test exercised the race */
|
||||
CU_ASSERT(count != 0);
|
||||
|
||||
/* The lvol is now gone */
|
||||
CU_ASSERT(spdk_bdev_get_by_name("lvs1/lvol") == NULL);
|
||||
CU_ASSERT(spdk_lvol_get_by_names("lvs1", "lvol") == NULL);
|
||||
|
||||
/* Clean up */
|
||||
rc = 0xbad;
|
||||
bdev_aio_delete("aio1", unregister_cb, &rc);
|
||||
poll_error_updated(&rc);
|
||||
CU_ASSERT(rc == 0);
|
||||
rc = unlink(aiopath);
|
||||
CU_ASSERT(rc == 0);
|
||||
}
|
||||
|
||||
static void
|
||||
bdev_init_cb(void *arg, int rc)
|
||||
{
|
||||
@ -706,6 +791,7 @@ main(int argc, char **argv)
|
||||
CU_ADD_TEST(suite, esnap_clone_io);
|
||||
CU_ADD_TEST(suite, esnap_hotplug);
|
||||
CU_ADD_TEST(suite, esnap_remove_degraded);
|
||||
CU_ADD_TEST(suite, late_delete);
|
||||
|
||||
allocate_threads(2);
|
||||
set_thread(0);
|
||||
|
Loading…
Reference in New Issue
Block a user