From 4473942f46ef6c83c3bc0ad1dc86df12131a83a9 Mon Sep 17 00:00:00 2001 From: Pawel Wodkowski Date: Fri, 23 Mar 2018 20:35:21 +0100 Subject: [PATCH] bdev: replace tailq by arrays in base and vbdev linking SPKD base bdev might be part of multiple vbdevs. The same is true in reverse direction. So consider folowing scenario: bdev3 bdev4 bdev5 | | | +-+--+ + +--+--+ / \ | / \ bdev0 bdev1 bdev2 In current implementation bdev0/1/2 will apear as base base for bdev3/4/5 which is obviously wrong. This patch try to address this issue. Change-Id: Ic99c13c8656ceb597aba7e41ccb2fa8090b4f13b Signed-off-by: Pawel Wodkowski Reviewed-on: https://review.gerrithub.io/405104 Reviewed-by: Shuhei Matsumoto Reviewed-by: Dariusz Stojaczyk Tested-by: SPDK Automated Test System Reviewed-by: Daniel Verkamp --- include/spdk_internal/bdev.h | 13 +- lib/bdev/bdev.c | 152 ++++++++++++++++++++---- test/unit/lib/bdev/bdev.c/bdev_ut.c | 177 ++++++++++++++++++++-------- test/unit/lib/bdev/part.c/part_ut.c | 5 +- 4 files changed, 266 insertions(+), 81 deletions(-) diff --git a/include/spdk_internal/bdev.h b/include/spdk_internal/bdev.h index fc29fe471..39d434a1c 100644 --- a/include/spdk_internal/bdev.h +++ b/include/spdk_internal/bdev.h @@ -282,15 +282,14 @@ struct spdk_bdev { /** The bdev status */ enum spdk_bdev_status status; - /** The list of block devices that this block device is built on top of (if any). */ - TAILQ_HEAD(, spdk_bdev) base_bdevs; + /** The array of block devices that this block device is built on top of (if any). */ + struct spdk_bdev **base_bdevs; + size_t base_bdevs_cnt; - TAILQ_ENTRY(spdk_bdev) base_bdev_link; - /** The list of virtual block devices built on top of this block device. */ - TAILQ_HEAD(, spdk_bdev) vbdevs; - - TAILQ_ENTRY(spdk_bdev) vbdev_link; + /** The array of virtual block devices built on top of this block device. */ + struct spdk_bdev **vbdevs; + size_t vbdevs_cnt; /** * Pointer to the module that has claimed this bdev for purposes of creating virtual diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 0c147ce5b..6afa47980 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -2350,10 +2350,8 @@ spdk_bdev_io_get_thread(struct spdk_bdev_io *bdev_io) } static int -_spdk_bdev_register(struct spdk_bdev *bdev) +spdk_bdev_init(struct spdk_bdev *bdev) { - struct spdk_bdev_module *module; - assert(bdev->module != NULL); if (!bdev->name) { @@ -2370,9 +2368,6 @@ _spdk_bdev_register(struct spdk_bdev *bdev) TAILQ_INIT(&bdev->open_descs); - TAILQ_INIT(&bdev->vbdevs); - TAILQ_INIT(&bdev->base_bdevs); - TAILQ_INIT(&bdev->aliases); bdev->reset_in_progress = NULL; @@ -2382,6 +2377,22 @@ _spdk_bdev_register(struct spdk_bdev *bdev) sizeof(struct spdk_bdev_channel)); pthread_mutex_init(&bdev->mutex, NULL); + return 0; +} + +static void +spdk_bdev_fini(struct spdk_bdev *bdev) +{ + pthread_mutex_destroy(&bdev->mutex); + + spdk_io_device_unregister(__bdev_to_io_dev(bdev), NULL); +} + +static void +spdk_bdev_start(struct spdk_bdev *bdev) +{ + struct spdk_bdev_module *module; + SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Inserting bdev %s into list\n", bdev->name); TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, link); @@ -2391,34 +2402,132 @@ _spdk_bdev_register(struct spdk_bdev *bdev) module->examine(bdev); } } - - return 0; } int spdk_bdev_register(struct spdk_bdev *bdev) { - return _spdk_bdev_register(bdev); + int rc = spdk_bdev_init(bdev); + + if (rc == 0) { + spdk_bdev_start(bdev); + } + + return rc; +} + +static void +spdk_vbdev_remove_base_bdevs(struct spdk_bdev *vbdev) +{ + struct spdk_bdev **bdevs; + struct spdk_bdev *base; + size_t i, j, k; + bool found; + + /* Iterate over base bdevs to remove vbdev from them. */ + for (i = 0; i < vbdev->base_bdevs_cnt; i++) { + found = false; + base = vbdev->base_bdevs[i]; + + for (j = 0; j < base->vbdevs_cnt; j++) { + if (base->vbdevs[j] != vbdev) { + continue; + } + + for (k = j; k + 1 < base->vbdevs_cnt; k++) { + base->vbdevs[k] = base->vbdevs[k + 1]; + } + + base->vbdevs_cnt--; + if (base->vbdevs_cnt > 0) { + bdevs = realloc(base->vbdevs, base->vbdevs_cnt * sizeof(bdevs[0])); + /* It would be odd if shrinking memory block fail. */ + assert(bdevs); + base->vbdevs = bdevs; + } else { + free(base->vbdevs); + base->vbdevs = NULL; + } + + found = true; + break; + } + + if (!found) { + SPDK_WARNLOG("Bdev '%s' is not base bdev of '%s'.\n", base->name, vbdev->name); + } + } + + free(vbdev->base_bdevs); + vbdev->base_bdevs = NULL; + vbdev->base_bdevs_cnt = 0; +} + +static int +spdk_vbdev_set_base_bdevs(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, size_t cnt) +{ + struct spdk_bdev **vbdevs; + struct spdk_bdev *base; + size_t i; + + /* Adding base bdevs isn't supported (yet?). */ + assert(vbdev->base_bdevs_cnt == 0); + + vbdev->base_bdevs = malloc(cnt * sizeof(vbdev->base_bdevs[0])); + if (!vbdev->base_bdevs) { + SPDK_ERRLOG("%s - realloc() failed\n", vbdev->name); + return -ENOMEM; + } + + memcpy(vbdev->base_bdevs, base_bdevs, cnt * sizeof(vbdev->base_bdevs[0])); + vbdev->base_bdevs_cnt = cnt; + + /* Iterate over base bdevs to add this vbdev to them. */ + for (i = 0; i < cnt; i++) { + base = vbdev->base_bdevs[i]; + + assert(base != NULL); + assert(base->claim_module != NULL); + + vbdevs = realloc(base->vbdevs, (base->vbdevs_cnt + 1) * sizeof(vbdevs[0])); + if (!vbdevs) { + SPDK_ERRLOG("%s - realloc() failed\n", base->name); + spdk_vbdev_remove_base_bdevs(vbdev); + return -ENOMEM; + } + + vbdevs[base->vbdevs_cnt] = vbdev; + base->vbdevs = vbdevs; + base->vbdevs_cnt++; + } + + return 0; } int spdk_vbdev_register(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, int base_bdev_count) { - int i, rc; + int rc; - rc = _spdk_bdev_register(vbdev); + rc = spdk_bdev_init(vbdev); if (rc) { return rc; } - for (i = 0; i < base_bdev_count; i++) { - assert(base_bdevs[i] != NULL); - assert(base_bdevs[i]->claim_module != NULL); - TAILQ_INSERT_TAIL(&vbdev->base_bdevs, base_bdevs[i], base_bdev_link); - TAILQ_INSERT_TAIL(&base_bdevs[i]->vbdevs, vbdev, vbdev_link); + if (base_bdev_count == 0) { + spdk_bdev_start(vbdev); + return 0; } + rc = spdk_vbdev_set_base_bdevs(vbdev, base_bdevs, base_bdev_count); + if (rc) { + spdk_bdev_fini(vbdev); + return rc; + } + + spdk_bdev_start(vbdev); return 0; + } void @@ -2443,17 +2552,12 @@ spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void struct spdk_bdev_desc *desc, *tmp; int rc; bool do_destruct = true; - struct spdk_bdev *base_bdev; SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Removing bdev %s from list\n", bdev->name); pthread_mutex_lock(&bdev->mutex); - if (!TAILQ_EMPTY(&bdev->base_bdevs)) { - TAILQ_FOREACH(base_bdev, &bdev->base_bdevs, base_bdev_link) { - TAILQ_REMOVE(&base_bdev->vbdevs, bdev, vbdev_link); - } - } + spdk_vbdev_remove_base_bdevs(bdev); bdev->status = SPDK_BDEV_STATUS_REMOVING; bdev->unregister_cb = cb_fn; @@ -2480,9 +2584,7 @@ spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, link); pthread_mutex_unlock(&bdev->mutex); - pthread_mutex_destroy(&bdev->mutex); - - spdk_io_device_unregister(__bdev_to_io_dev(bdev), NULL); + spdk_bdev_fini(bdev); rc = bdev->fn_table->destruct(bdev->ctxt); if (rc < 0) { diff --git a/test/unit/lib/bdev/bdev.c/bdev_ut.c b/test/unit/lib/bdev/bdev.c/bdev_ut.c index 537462d2f..1fe07f78f 100644 --- a/test/unit/lib/bdev/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/bdev.c/bdev_ut.c @@ -89,6 +89,45 @@ vbdev_ut_examine(struct spdk_bdev *bdev) spdk_bdev_module_examine_done(&vbdev_ut_if); } +static bool +is_vbdev(struct spdk_bdev *base, struct spdk_bdev *vbdev) +{ + size_t i; + int found = 0; + + for (i = 0; i < base->vbdevs_cnt; i++) { + found += base->vbdevs[i] == vbdev; + } + + CU_ASSERT(found <= 1); + return !!found; +} + +static bool +is_base_bdev(struct spdk_bdev *base, struct spdk_bdev *vbdev) +{ + size_t i; + int found = 0; + + for (i = 0; i < vbdev->base_bdevs_cnt; i++) { + found += vbdev->base_bdevs[i] == base; + } + + CU_ASSERT(found <= 1); + return !!found; +} + +static bool +check_base_and_vbdev(struct spdk_bdev *base, struct spdk_bdev *vbdev) +{ + bool _is_vbdev = is_vbdev(base, vbdev); + bool _is_base = is_base_bdev(base, vbdev); + + CU_ASSERT(_is_vbdev == _is_base); + + return _is_base && _is_vbdev; +} + static struct spdk_bdev * allocate_bdev(char *name) { @@ -104,8 +143,8 @@ allocate_bdev(char *name) rc = spdk_bdev_register(bdev); CU_ASSERT(rc == 0); - CU_ASSERT(TAILQ_EMPTY(&bdev->base_bdevs)); - CU_ASSERT(TAILQ_EMPTY(&bdev->vbdevs)); + CU_ASSERT(bdev->base_bdevs_cnt == 0); + CU_ASSERT(bdev->vbdevs_cnt == 0); return bdev; } @@ -132,8 +171,14 @@ allocate_vbdev(char *name, struct spdk_bdev *base1, struct spdk_bdev *base2) rc = spdk_vbdev_register(bdev, array, base2 == NULL ? 1 : 2); CU_ASSERT(rc == 0); - CU_ASSERT(!TAILQ_EMPTY(&bdev->base_bdevs)); - CU_ASSERT(TAILQ_EMPTY(&bdev->vbdevs)); + CU_ASSERT(bdev->base_bdevs_cnt > 0); + CU_ASSERT(bdev->vbdevs_cnt == 0); + + CU_ASSERT(check_base_and_vbdev(base1, bdev) == true); + + if (base2) { + CU_ASSERT(check_base_and_vbdev(base2, bdev) == true); + } return bdev; } @@ -142,48 +187,55 @@ static void free_bdev(struct spdk_bdev *bdev) { spdk_bdev_unregister(bdev, NULL, NULL); + memset(bdev, 0xFF, sizeof(*bdev)); free(bdev); } static void free_vbdev(struct spdk_bdev *bdev) { - CU_ASSERT(!TAILQ_EMPTY(&bdev->base_bdevs)); + CU_ASSERT(bdev->base_bdevs_cnt != 0); spdk_bdev_unregister(bdev, NULL, NULL); + memset(bdev, 0xFF, sizeof(*bdev)); free(bdev); } static void open_write_test(void) { - struct spdk_bdev *bdev[8]; - struct spdk_bdev_desc *desc[8] = {}; + struct spdk_bdev *bdev[9]; + struct spdk_bdev_desc *desc[9] = {}; int rc; /* * Create a tree of bdevs to test various open w/ write cases. * - * bdev0 through bdev2 are physical block devices, such as NVMe + * bdev0 through bdev3 are physical block devices, such as NVMe * namespaces or Ceph block devices. * - * bdev3 is a virtual bdev with multiple base bdevs. This models + * bdev4 is a virtual bdev with multiple base bdevs. This models * caching or RAID use cases. * - * bdev4 through bdev6 are all virtual bdevs with the same base - * bdev. This models partitioning or logical volume use cases. + * bdev5 through bdev7 are all virtual bdevs with the same base + * bdev (except bdev7). This models partitioning or logical volume + * use cases. * - * bdev7 is a virtual bdev with multiple base bdevs, but these + * bdev7 is a virtual bdev with multiple base bdevs. One of base bdevs + * (bdev2) is shared with other virtual bdevs: bdev5 and bdev6. This + * models caching, RAID, partitioning or logical volumes use cases. + * + * bdev8 is a virtual bdev with multiple base bdevs, but these * base bdevs are themselves virtual bdevs. * - * bdev7 + * bdev8 * | * +----------+ * | | - * bdev3 bdev4 bdev5 bdev6 + * bdev4 bdev5 bdev6 bdev7 * | | | | - * +---+---+ +-------+-------+ - * | | | - * bdev0 bdev1 bdev2 + * +---+---+ +---+ + +---+---+ + * | | \ | / \ + * bdev0 bdev1 bdev2 bdev3 */ bdev[0] = allocate_bdev("bdev0"); @@ -198,18 +250,48 @@ open_write_test(void) rc = spdk_bdev_module_claim_bdev(bdev[2], NULL, &bdev_ut_if); CU_ASSERT(rc == 0); - bdev[3] = allocate_vbdev("bdev3", bdev[0], bdev[1]); + bdev[3] = allocate_bdev("bdev3"); rc = spdk_bdev_module_claim_bdev(bdev[3], NULL, &bdev_ut_if); CU_ASSERT(rc == 0); - bdev[4] = allocate_vbdev("bdev4", bdev[2], NULL); + bdev[4] = allocate_vbdev("bdev4", bdev[0], bdev[1]); rc = spdk_bdev_module_claim_bdev(bdev[4], NULL, &bdev_ut_if); CU_ASSERT(rc == 0); bdev[5] = allocate_vbdev("bdev5", bdev[2], NULL); + rc = spdk_bdev_module_claim_bdev(bdev[5], NULL, &bdev_ut_if); + CU_ASSERT(rc == 0); + bdev[6] = allocate_vbdev("bdev6", bdev[2], NULL); - bdev[7] = allocate_vbdev("bdev7", bdev[3], bdev[4]); + bdev[7] = allocate_vbdev("bdev7", bdev[2], bdev[3]); + + bdev[8] = allocate_vbdev("bdev8", bdev[4], bdev[5]); + + /* Check tree */ + CU_ASSERT(check_base_and_vbdev(bdev[0], bdev[4]) == true); + CU_ASSERT(check_base_and_vbdev(bdev[0], bdev[5]) == false); + CU_ASSERT(check_base_and_vbdev(bdev[0], bdev[6]) == false); + CU_ASSERT(check_base_and_vbdev(bdev[0], bdev[7]) == false); + CU_ASSERT(check_base_and_vbdev(bdev[0], bdev[8]) == false); + + CU_ASSERT(check_base_and_vbdev(bdev[1], bdev[4]) == true); + CU_ASSERT(check_base_and_vbdev(bdev[1], bdev[5]) == false); + CU_ASSERT(check_base_and_vbdev(bdev[1], bdev[6]) == false); + CU_ASSERT(check_base_and_vbdev(bdev[1], bdev[7]) == false); + CU_ASSERT(check_base_and_vbdev(bdev[1], bdev[8]) == false); + + CU_ASSERT(check_base_and_vbdev(bdev[2], bdev[4]) == false); + CU_ASSERT(check_base_and_vbdev(bdev[2], bdev[5]) == true); + CU_ASSERT(check_base_and_vbdev(bdev[2], bdev[6]) == true); + CU_ASSERT(check_base_and_vbdev(bdev[2], bdev[7]) == true); + CU_ASSERT(check_base_and_vbdev(bdev[2], bdev[8]) == false); + + CU_ASSERT(check_base_and_vbdev(bdev[3], bdev[4]) == false); + CU_ASSERT(check_base_and_vbdev(bdev[3], bdev[5]) == false); + CU_ASSERT(check_base_and_vbdev(bdev[3], bdev[6]) == false); + CU_ASSERT(check_base_and_vbdev(bdev[3], bdev[7]) == true); + CU_ASSERT(check_base_and_vbdev(bdev[3], bdev[8]) == false); /* Open bdev0 read-only. This should succeed. */ rc = spdk_bdev_open(bdev[0], false, NULL, NULL, &desc[0]); @@ -225,29 +307,7 @@ open_write_test(void) CU_ASSERT(rc == -EPERM); /* - * Open bdev3 read/write. This should fail since bdev3 has been claimed - * by a vbdev module. - */ - rc = spdk_bdev_open(bdev[3], true, NULL, NULL, &desc[3]); - CU_ASSERT(rc == -EPERM); - - /* Open bdev3 read-only. This should succeed. */ - rc = spdk_bdev_open(bdev[3], false, NULL, NULL, &desc[3]); - CU_ASSERT(rc == 0); - CU_ASSERT(desc[3] != NULL); - spdk_bdev_close(desc[3]); - - /* - * Open bdev7 read/write. This should succeed since it is a leaf - * bdev. - */ - rc = spdk_bdev_open(bdev[7], true, NULL, NULL, &desc[7]); - CU_ASSERT(rc == 0); - CU_ASSERT(desc[7] != NULL); - spdk_bdev_close(desc[7]); - - /* - * Open bdev4 read/write. This should fail since bdev4 has been claimed + * Open bdev4 read/write. This should fail since bdev3 has been claimed * by a vbdev module. */ rc = spdk_bdev_open(bdev[4], true, NULL, NULL, &desc[4]); @@ -259,17 +319,40 @@ open_write_test(void) CU_ASSERT(desc[4] != NULL); spdk_bdev_close(desc[4]); - free_vbdev(bdev[7]); + /* + * Open bdev8 read/write. This should succeed since it is a leaf + * bdev. + */ + rc = spdk_bdev_open(bdev[8], true, NULL, NULL, &desc[8]); + CU_ASSERT(rc == 0); + CU_ASSERT(desc[8] != NULL); + spdk_bdev_close(desc[8]); + + /* + * Open bdev5 read/write. This should fail since bdev4 has been claimed + * by a vbdev module. + */ + rc = spdk_bdev_open(bdev[5], true, NULL, NULL, &desc[5]); + CU_ASSERT(rc == -EPERM); + + /* Open bdev4 read-only. This should succeed. */ + rc = spdk_bdev_open(bdev[5], false, NULL, NULL, &desc[5]); + CU_ASSERT(rc == 0); + CU_ASSERT(desc[5] != NULL); + spdk_bdev_close(desc[5]); + + free_vbdev(bdev[8]); - free_vbdev(bdev[3]); - free_vbdev(bdev[4]); free_vbdev(bdev[5]); free_vbdev(bdev[6]); + free_vbdev(bdev[7]); + + free_vbdev(bdev[4]); free_bdev(bdev[0]); free_bdev(bdev[1]); free_bdev(bdev[2]); - + free_bdev(bdev[3]); } static void diff --git a/test/unit/lib/bdev/part.c/part_ut.c b/test/unit/lib/bdev/part.c/part_ut.c index 0d1ee44e0..3c94d7a1e 100644 --- a/test/unit/lib/bdev/part.c/part_ut.c +++ b/test/unit/lib/bdev/part.c/part_ut.c @@ -91,7 +91,8 @@ static void part_test(void) { struct spdk_bdev_part_base *base; - struct spdk_bdev_part part1, part2; + struct spdk_bdev_part part1 = {}; + struct spdk_bdev_part part2 = {}; struct spdk_bdev bdev_base = {}; SPDK_BDEV_PART_TAILQ tailq = TAILQ_HEAD_INITIALIZER(tailq); int rc; @@ -116,7 +117,7 @@ part_test(void) * The base device was removed - ensure that the partition vbdevs were * removed from the base's vbdev list. */ - CU_ASSERT(TAILQ_EMPTY(&bdev_base.vbdevs)); + CU_ASSERT(bdev_base.vbdevs_cnt == 0); spdk_bdev_part_base_free(base); spdk_bdev_unregister(&bdev_base, NULL, NULL);