bdev: call examine_disk() for all claim holders

If multiple claims exist on a bdev, examine_disk() is called for each of
them.

Change-Id: I0a6dc3e4bd1da20bbcbddf97a16e04c62c82354c
Signed-off-by: Mike Gerdts <mgerdts@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15290
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
This commit is contained in:
Mike Gerdts 2022-10-28 14:32:09 -05:00 committed by Tomasz Zawadzki
parent 47bb651cd5
commit 86bbcdb8f6
3 changed files with 294 additions and 8 deletions

View File

@ -625,6 +625,13 @@ struct spdk_bdev {
/** The bdev status */
enum spdk_bdev_status status;
/**
* How many bdev_examine() calls are iterating claim.v2.claims. When non-zero claims
* that are released will be cleared but remain on the claims list until
* bdev_examine() finishes. Must hold spinlock on all updates.
*/
uint32_t examine_in_progress;
/**
* The claim type: used in conjunction with claim. Must hold spinlock on all
* updates.

View File

@ -396,6 +396,7 @@ static bool bdev_abort_buf_io(struct spdk_bdev_mgmt_channel *ch, struct spdk_bde
static bool claim_type_is_v2(enum spdk_bdev_claim_type type);
static void bdev_desc_release_claims(struct spdk_bdev_desc *desc);
static void claim_reset(struct spdk_bdev *bdev);
void
spdk_bdev_get_opts(struct spdk_bdev_opts *opts, size_t opts_size)
@ -662,6 +663,7 @@ static void
bdev_examine(struct spdk_bdev *bdev)
{
struct spdk_bdev_module *module;
struct spdk_bdev_module_claim *claim, *tmpclaim;
uint32_t action;
if (!bdev_ok_to_examine(bdev)) {
@ -711,7 +713,53 @@ bdev_examine(struct spdk_bdev *bdev)
}
break;
default:
break;
/* Examine by all bdev modules with a v2 claim */
assert(claim_type_is_v2(bdev->internal.claim_type));
/*
* Removal of tailq nodes while iterating can cause the iteration to jump out of the
* list, perhaps accessing freed memory. Without protection, this could happen
* while the lock is dropped during the examine callback.
*/
bdev->internal.examine_in_progress++;
TAILQ_FOREACH(claim, &bdev->internal.claim.v2.claims, link) {
module = claim->module;
if (module == NULL) {
/* This is a vestigial claim, held by examine_count */
continue;
}
if (module->examine_disk == NULL) {
continue;
}
spdk_spin_lock(&module->internal.spinlock);
module->internal.action_in_progress++;
spdk_spin_unlock(&module->internal.spinlock);
/* Call examine_disk without holding internal.spinlock. */
spdk_spin_unlock(&bdev->internal.spinlock);
module->examine_disk(bdev);
spdk_spin_lock(&bdev->internal.spinlock);
}
assert(bdev->internal.examine_in_progress > 0);
bdev->internal.examine_in_progress--;
if (bdev->internal.examine_in_progress == 0) {
/* Remove any claims that were released during examine_disk */
TAILQ_FOREACH_SAFE(claim, &bdev->internal.claim.v2.claims, link, tmpclaim) {
if (claim->desc != NULL) {
continue;
}
TAILQ_REMOVE(&bdev->internal.claim.v2.claims, claim, link);
free(claim);
}
if (TAILQ_EMPTY(&bdev->internal.claim.v2.claims)) {
claim_reset(bdev);
}
}
}
spdk_spin_unlock(&bdev->internal.spinlock);
@ -7749,13 +7797,18 @@ bdev_desc_release_claims(struct spdk_bdev_desc *desc)
assert(spdk_spin_held(&bdev->internal.spinlock));
assert(claim_type_is_v2(bdev->internal.claim_type));
TAILQ_REMOVE(&bdev->internal.claim.v2.claims, desc->claim, link);
free(desc->claim);
desc->claim = NULL;
if (TAILQ_EMPTY(&bdev->internal.claim.v2.claims)) {
claim_reset(bdev);
if (bdev->internal.examine_in_progress == 0) {
TAILQ_REMOVE(&bdev->internal.claim.v2.claims, desc->claim, link);
free(desc->claim);
if (TAILQ_EMPTY(&bdev->internal.claim.v2.claims)) {
claim_reset(bdev);
}
} else {
/* This is a dead claim that will be cleaned up when bdev_examine() is done. */
desc->claim->module = NULL;
desc->claim->desc = NULL;
}
desc->claim = NULL;
}
/*

View File

@ -6231,11 +6231,31 @@ examine_no_lock_held(struct spdk_bdev *bdev)
CU_ASSERT(!spdk_spin_held(&bdev->internal.spinlock));
}
struct examine_claim_v2_ctx {
struct ut_examine_ctx examine_ctx;
enum spdk_bdev_claim_type claim_type;
struct spdk_bdev_desc *desc;
};
static void
examine_claim_v2(struct spdk_bdev *bdev)
{
struct examine_claim_v2_ctx *ctx = bdev->ctxt;
int rc;
rc = spdk_bdev_open_ext(bdev->name, false, bdev_ut_event_cb, NULL, &ctx->desc);
CU_ASSERT(rc == 0);
rc = spdk_bdev_module_claim_bdev_desc(ctx->desc, ctx->claim_type, NULL, &vbdev_ut_if);
CU_ASSERT(rc == 0);
}
static void
examine_locks(void)
{
struct spdk_bdev *bdev;
struct ut_examine_ctx ctx = { 0 };
struct examine_claim_v2_ctx v2_ctx;
/* Without any claims, one code path is taken */
ctx.examine_config = examine_no_lock_held;
@ -6247,7 +6267,7 @@ examine_locks(void)
CU_ASSERT(bdev->internal.claim.v1.module == NULL);
free_bdev(bdev);
/* Exercise the other path that is taken when examine_config() takes a claim. */
/* Exercise another path that is taken when examine_config() takes a v1 claim. */
memset(&ctx, 0, sizeof(ctx));
ctx.examine_config = examine_claim_v1;
ctx.examine_disk = examine_no_lock_held;
@ -6260,6 +6280,19 @@ examine_locks(void)
CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
CU_ASSERT(bdev->internal.claim.v1.module == NULL);
free_bdev(bdev);
/* Exercise the final path that comes with v2 claims. */
memset(&v2_ctx, 0, sizeof(v2_ctx));
v2_ctx.examine_ctx.examine_config = examine_claim_v2;
v2_ctx.examine_ctx.examine_disk = examine_no_lock_held;
v2_ctx.claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
bdev = allocate_bdev_ctx("bdev0", &v2_ctx);
CU_ASSERT(v2_ctx.examine_ctx.examine_config_count == 1);
CU_ASSERT(v2_ctx.examine_ctx.examine_disk_count == 1);
CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE);
spdk_bdev_close(v2_ctx.desc);
CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
free_bdev(bdev);
}
#define UT_ASSERT_CLAIM_V2_COUNT(bdev, expect) \
@ -6809,6 +6842,198 @@ claim_v1_existing_v2(void)
free_bdev(bdev);
}
static void ut_examine_claimed_config0(struct spdk_bdev *bdev);
static void ut_examine_claimed_disk0(struct spdk_bdev *bdev);
static void ut_examine_claimed_config1(struct spdk_bdev *bdev);
static void ut_examine_claimed_disk1(struct spdk_bdev *bdev);
#define UT_MAX_EXAMINE_MODS 2
struct spdk_bdev_module examine_claimed_mods[UT_MAX_EXAMINE_MODS] = {
{
.name = "vbdev_ut_examine0",
.module_init = vbdev_ut_module_init,
.module_fini = vbdev_ut_module_fini,
.examine_config = ut_examine_claimed_config0,
.examine_disk = ut_examine_claimed_disk0,
},
{
.name = "vbdev_ut_examine1",
.module_init = vbdev_ut_module_init,
.module_fini = vbdev_ut_module_fini,
.examine_config = ut_examine_claimed_config1,
.examine_disk = ut_examine_claimed_disk1,
}
};
SPDK_BDEV_MODULE_REGISTER(bdev_ut_claimed0, &examine_claimed_mods[0])
SPDK_BDEV_MODULE_REGISTER(bdev_ut_claimed1, &examine_claimed_mods[1])
struct ut_examine_claimed_ctx {
uint32_t examine_config_count;
uint32_t examine_disk_count;
/* Claim type to take, with these options */
enum spdk_bdev_claim_type claim_type;
struct spdk_bdev_claim_opts claim_opts;
/* Expected return value from spdk_bdev_module_claim_bdev_desc() */
int expect_claim_err;
/* Descriptor used for a claim */
struct spdk_bdev_desc *desc;
} examine_claimed_ctx[UT_MAX_EXAMINE_MODS];
bool ut_testing_examine_claimed;
static void
reset_examine_claimed_ctx(void)
{
struct ut_examine_claimed_ctx *ctx;
uint32_t i;
for (i = 0; i < SPDK_COUNTOF(examine_claimed_ctx); i++) {
ctx = &examine_claimed_ctx[i];
if (ctx->desc != NULL) {
spdk_bdev_close(ctx->desc);
}
memset(ctx, 0, sizeof(*ctx));
spdk_bdev_claim_opts_init(&ctx->claim_opts, sizeof(ctx->claim_opts));
}
}
static void
examine_claimed_config(struct spdk_bdev *bdev, uint32_t modnum)
{
SPDK_CU_ASSERT_FATAL(modnum < UT_MAX_EXAMINE_MODS);
struct spdk_bdev_module *module = &examine_claimed_mods[modnum];
struct ut_examine_claimed_ctx *ctx = &examine_claimed_ctx[modnum];
int rc;
if (!ut_testing_examine_claimed) {
spdk_bdev_module_examine_done(module);
return;
}
ctx->examine_config_count++;
if (ctx->claim_type != SPDK_BDEV_CLAIM_NONE) {
rc = spdk_bdev_open_ext(bdev->name, false, bdev_ut_event_cb, &ctx->claim_opts,
&ctx->desc);
CU_ASSERT(rc == 0);
rc = spdk_bdev_module_claim_bdev_desc(ctx->desc, ctx->claim_type, NULL, module);
CU_ASSERT(rc == ctx->expect_claim_err);
}
spdk_bdev_module_examine_done(module);
}
static void
ut_examine_claimed_config0(struct spdk_bdev *bdev)
{
examine_claimed_config(bdev, 0);
}
static void
ut_examine_claimed_config1(struct spdk_bdev *bdev)
{
examine_claimed_config(bdev, 1);
}
static void
examine_claimed_disk(struct spdk_bdev *bdev, uint32_t modnum)
{
SPDK_CU_ASSERT_FATAL(modnum < UT_MAX_EXAMINE_MODS);
struct spdk_bdev_module *module = &examine_claimed_mods[modnum];
struct ut_examine_claimed_ctx *ctx = &examine_claimed_ctx[modnum];
if (!ut_testing_examine_claimed) {
spdk_bdev_module_examine_done(module);
return;
}
ctx->examine_disk_count++;
spdk_bdev_module_examine_done(module);
}
static void
ut_examine_claimed_disk0(struct spdk_bdev *bdev)
{
examine_claimed_disk(bdev, 0);
}
static void
ut_examine_claimed_disk1(struct spdk_bdev *bdev)
{
examine_claimed_disk(bdev, 1);
}
static void
examine_claimed(void)
{
struct spdk_bdev *bdev;
struct spdk_bdev_module *mod = examine_claimed_mods;
struct ut_examine_claimed_ctx *ctx = examine_claimed_ctx;
ut_testing_examine_claimed = true;
reset_examine_claimed_ctx();
/*
* With one module claiming, both modules' examine_config should be called, but only the
* claiming module's examine_disk should be called.
*/
ctx[0].claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
bdev = allocate_bdev("bdev0");
CU_ASSERT(ctx[0].examine_config_count == 1);
CU_ASSERT(ctx[0].examine_disk_count == 1);
SPDK_CU_ASSERT_FATAL(ctx[0].desc != NULL);
CU_ASSERT(ctx[0].desc->claim->module == &mod[0]);
CU_ASSERT(ctx[1].examine_config_count == 1);
CU_ASSERT(ctx[1].examine_disk_count == 0);
CU_ASSERT(ctx[1].desc == NULL);
reset_examine_claimed_ctx();
free_bdev(bdev);
/*
* With two modules claiming, both modules' examine_config and examine_disk should be
* called.
*/
ctx[0].claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
ctx[1].claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
bdev = allocate_bdev("bdev0");
CU_ASSERT(ctx[0].examine_config_count == 1);
CU_ASSERT(ctx[0].examine_disk_count == 1);
SPDK_CU_ASSERT_FATAL(ctx[0].desc != NULL);
CU_ASSERT(ctx[0].desc->claim->module == &mod[0]);
CU_ASSERT(ctx[1].examine_config_count == 1);
CU_ASSERT(ctx[1].examine_disk_count == 1);
SPDK_CU_ASSERT_FATAL(ctx[1].desc != NULL);
CU_ASSERT(ctx[1].desc->claim->module == &mod[1]);
reset_examine_claimed_ctx();
free_bdev(bdev);
/*
* If two vbdev modules try to claim with conflicting claim types, the module that was added
* last wins. The winner gets the claim and is the only one that has its examine_disk
* callback invoked.
*/
ctx[0].claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
ctx[0].expect_claim_err = -EPERM;
ctx[1].claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE;
bdev = allocate_bdev("bdev0");
CU_ASSERT(ctx[0].examine_config_count == 1);
CU_ASSERT(ctx[0].examine_disk_count == 0);
CU_ASSERT(ctx[1].examine_config_count == 1);
CU_ASSERT(ctx[1].examine_disk_count == 1);
SPDK_CU_ASSERT_FATAL(ctx[1].desc != NULL);
CU_ASSERT(ctx[1].desc->claim->module == &mod[1]);
CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE);
reset_examine_claimed_ctx();
free_bdev(bdev);
ut_testing_examine_claimed = false;
}
int
main(int argc, char **argv)
{
@ -6878,6 +7103,7 @@ main(int argc, char **argv)
CU_ADD_TEST(suite, claim_v2_existing_writer);
CU_ADD_TEST(suite, claim_v2_existing_v1);
CU_ADD_TEST(suite, claim_v1_existing_v2);
CU_ADD_TEST(suite, examine_claimed);
allocate_cores(1);
allocate_threads(1);