bdev: replace tailq by arrays in base and vbdev linking

SPKD base bdev might be part of multiple vbdevs. The same is true in
reverse direction. So consider folowing scenario:

  bdev3  bdev4  bdev5
     |     |     |
   +-+--+  +  +--+--+
  /      \ | /       \
bdev0    bdev1      bdev2

In current implementation bdev0/1/2 will apear as base base for
bdev3/4/5 which is obviously wrong.
This patch try to address this issue.

Change-Id: Ic99c13c8656ceb597aba7e41ccb2fa8090b4f13b
Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
Reviewed-on: https://review.gerrithub.io/405104
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Dariusz Stojaczyk <dariuszx.stojaczyk@intel.com>
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
This commit is contained in:
Pawel Wodkowski 2018-03-23 20:35:21 +01:00 committed by Jim Harris
parent 3d220b925f
commit 4473942f46
4 changed files with 266 additions and 81 deletions

View File

@ -282,15 +282,14 @@ struct spdk_bdev {
/** The bdev status */
enum spdk_bdev_status status;
/** The list of block devices that this block device is built on top of (if any). */
TAILQ_HEAD(, spdk_bdev) base_bdevs;
/** The array of block devices that this block device is built on top of (if any). */
struct spdk_bdev **base_bdevs;
size_t base_bdevs_cnt;
TAILQ_ENTRY(spdk_bdev) base_bdev_link;
/** The list of virtual block devices built on top of this block device. */
TAILQ_HEAD(, spdk_bdev) vbdevs;
TAILQ_ENTRY(spdk_bdev) vbdev_link;
/** The array of virtual block devices built on top of this block device. */
struct spdk_bdev **vbdevs;
size_t vbdevs_cnt;
/**
* Pointer to the module that has claimed this bdev for purposes of creating virtual

View File

@ -2350,10 +2350,8 @@ spdk_bdev_io_get_thread(struct spdk_bdev_io *bdev_io)
}
static int
_spdk_bdev_register(struct spdk_bdev *bdev)
spdk_bdev_init(struct spdk_bdev *bdev)
{
struct spdk_bdev_module *module;
assert(bdev->module != NULL);
if (!bdev->name) {
@ -2370,9 +2368,6 @@ _spdk_bdev_register(struct spdk_bdev *bdev)
TAILQ_INIT(&bdev->open_descs);
TAILQ_INIT(&bdev->vbdevs);
TAILQ_INIT(&bdev->base_bdevs);
TAILQ_INIT(&bdev->aliases);
bdev->reset_in_progress = NULL;
@ -2382,6 +2377,22 @@ _spdk_bdev_register(struct spdk_bdev *bdev)
sizeof(struct spdk_bdev_channel));
pthread_mutex_init(&bdev->mutex, NULL);
return 0;
}
static void
spdk_bdev_fini(struct spdk_bdev *bdev)
{
pthread_mutex_destroy(&bdev->mutex);
spdk_io_device_unregister(__bdev_to_io_dev(bdev), NULL);
}
static void
spdk_bdev_start(struct spdk_bdev *bdev)
{
struct spdk_bdev_module *module;
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Inserting bdev %s into list\n", bdev->name);
TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, link);
@ -2391,34 +2402,132 @@ _spdk_bdev_register(struct spdk_bdev *bdev)
module->examine(bdev);
}
}
return 0;
}
int
spdk_bdev_register(struct spdk_bdev *bdev)
{
return _spdk_bdev_register(bdev);
int rc = spdk_bdev_init(bdev);
if (rc == 0) {
spdk_bdev_start(bdev);
}
return rc;
}
static void
spdk_vbdev_remove_base_bdevs(struct spdk_bdev *vbdev)
{
struct spdk_bdev **bdevs;
struct spdk_bdev *base;
size_t i, j, k;
bool found;
/* Iterate over base bdevs to remove vbdev from them. */
for (i = 0; i < vbdev->base_bdevs_cnt; i++) {
found = false;
base = vbdev->base_bdevs[i];
for (j = 0; j < base->vbdevs_cnt; j++) {
if (base->vbdevs[j] != vbdev) {
continue;
}
for (k = j; k + 1 < base->vbdevs_cnt; k++) {
base->vbdevs[k] = base->vbdevs[k + 1];
}
base->vbdevs_cnt--;
if (base->vbdevs_cnt > 0) {
bdevs = realloc(base->vbdevs, base->vbdevs_cnt * sizeof(bdevs[0]));
/* It would be odd if shrinking memory block fail. */
assert(bdevs);
base->vbdevs = bdevs;
} else {
free(base->vbdevs);
base->vbdevs = NULL;
}
found = true;
break;
}
if (!found) {
SPDK_WARNLOG("Bdev '%s' is not base bdev of '%s'.\n", base->name, vbdev->name);
}
}
free(vbdev->base_bdevs);
vbdev->base_bdevs = NULL;
vbdev->base_bdevs_cnt = 0;
}
static int
spdk_vbdev_set_base_bdevs(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, size_t cnt)
{
struct spdk_bdev **vbdevs;
struct spdk_bdev *base;
size_t i;
/* Adding base bdevs isn't supported (yet?). */
assert(vbdev->base_bdevs_cnt == 0);
vbdev->base_bdevs = malloc(cnt * sizeof(vbdev->base_bdevs[0]));
if (!vbdev->base_bdevs) {
SPDK_ERRLOG("%s - realloc() failed\n", vbdev->name);
return -ENOMEM;
}
memcpy(vbdev->base_bdevs, base_bdevs, cnt * sizeof(vbdev->base_bdevs[0]));
vbdev->base_bdevs_cnt = cnt;
/* Iterate over base bdevs to add this vbdev to them. */
for (i = 0; i < cnt; i++) {
base = vbdev->base_bdevs[i];
assert(base != NULL);
assert(base->claim_module != NULL);
vbdevs = realloc(base->vbdevs, (base->vbdevs_cnt + 1) * sizeof(vbdevs[0]));
if (!vbdevs) {
SPDK_ERRLOG("%s - realloc() failed\n", base->name);
spdk_vbdev_remove_base_bdevs(vbdev);
return -ENOMEM;
}
vbdevs[base->vbdevs_cnt] = vbdev;
base->vbdevs = vbdevs;
base->vbdevs_cnt++;
}
return 0;
}
int
spdk_vbdev_register(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, int base_bdev_count)
{
int i, rc;
int rc;
rc = _spdk_bdev_register(vbdev);
rc = spdk_bdev_init(vbdev);
if (rc) {
return rc;
}
for (i = 0; i < base_bdev_count; i++) {
assert(base_bdevs[i] != NULL);
assert(base_bdevs[i]->claim_module != NULL);
TAILQ_INSERT_TAIL(&vbdev->base_bdevs, base_bdevs[i], base_bdev_link);
TAILQ_INSERT_TAIL(&base_bdevs[i]->vbdevs, vbdev, vbdev_link);
if (base_bdev_count == 0) {
spdk_bdev_start(vbdev);
return 0;
}
rc = spdk_vbdev_set_base_bdevs(vbdev, base_bdevs, base_bdev_count);
if (rc) {
spdk_bdev_fini(vbdev);
return rc;
}
spdk_bdev_start(vbdev);
return 0;
}
void
@ -2443,17 +2552,12 @@ spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void
struct spdk_bdev_desc *desc, *tmp;
int rc;
bool do_destruct = true;
struct spdk_bdev *base_bdev;
SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Removing bdev %s from list\n", bdev->name);
pthread_mutex_lock(&bdev->mutex);
if (!TAILQ_EMPTY(&bdev->base_bdevs)) {
TAILQ_FOREACH(base_bdev, &bdev->base_bdevs, base_bdev_link) {
TAILQ_REMOVE(&base_bdev->vbdevs, bdev, vbdev_link);
}
}
spdk_vbdev_remove_base_bdevs(bdev);
bdev->status = SPDK_BDEV_STATUS_REMOVING;
bdev->unregister_cb = cb_fn;
@ -2480,9 +2584,7 @@ spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void
TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, link);
pthread_mutex_unlock(&bdev->mutex);
pthread_mutex_destroy(&bdev->mutex);
spdk_io_device_unregister(__bdev_to_io_dev(bdev), NULL);
spdk_bdev_fini(bdev);
rc = bdev->fn_table->destruct(bdev->ctxt);
if (rc < 0) {

View File

@ -89,6 +89,45 @@ vbdev_ut_examine(struct spdk_bdev *bdev)
spdk_bdev_module_examine_done(&vbdev_ut_if);
}
static bool
is_vbdev(struct spdk_bdev *base, struct spdk_bdev *vbdev)
{
size_t i;
int found = 0;
for (i = 0; i < base->vbdevs_cnt; i++) {
found += base->vbdevs[i] == vbdev;
}
CU_ASSERT(found <= 1);
return !!found;
}
static bool
is_base_bdev(struct spdk_bdev *base, struct spdk_bdev *vbdev)
{
size_t i;
int found = 0;
for (i = 0; i < vbdev->base_bdevs_cnt; i++) {
found += vbdev->base_bdevs[i] == base;
}
CU_ASSERT(found <= 1);
return !!found;
}
static bool
check_base_and_vbdev(struct spdk_bdev *base, struct spdk_bdev *vbdev)
{
bool _is_vbdev = is_vbdev(base, vbdev);
bool _is_base = is_base_bdev(base, vbdev);
CU_ASSERT(_is_vbdev == _is_base);
return _is_base && _is_vbdev;
}
static struct spdk_bdev *
allocate_bdev(char *name)
{
@ -104,8 +143,8 @@ allocate_bdev(char *name)
rc = spdk_bdev_register(bdev);
CU_ASSERT(rc == 0);
CU_ASSERT(TAILQ_EMPTY(&bdev->base_bdevs));
CU_ASSERT(TAILQ_EMPTY(&bdev->vbdevs));
CU_ASSERT(bdev->base_bdevs_cnt == 0);
CU_ASSERT(bdev->vbdevs_cnt == 0);
return bdev;
}
@ -132,8 +171,14 @@ allocate_vbdev(char *name, struct spdk_bdev *base1, struct spdk_bdev *base2)
rc = spdk_vbdev_register(bdev, array, base2 == NULL ? 1 : 2);
CU_ASSERT(rc == 0);
CU_ASSERT(!TAILQ_EMPTY(&bdev->base_bdevs));
CU_ASSERT(TAILQ_EMPTY(&bdev->vbdevs));
CU_ASSERT(bdev->base_bdevs_cnt > 0);
CU_ASSERT(bdev->vbdevs_cnt == 0);
CU_ASSERT(check_base_and_vbdev(base1, bdev) == true);
if (base2) {
CU_ASSERT(check_base_and_vbdev(base2, bdev) == true);
}
return bdev;
}
@ -142,48 +187,55 @@ static void
free_bdev(struct spdk_bdev *bdev)
{
spdk_bdev_unregister(bdev, NULL, NULL);
memset(bdev, 0xFF, sizeof(*bdev));
free(bdev);
}
static void
free_vbdev(struct spdk_bdev *bdev)
{
CU_ASSERT(!TAILQ_EMPTY(&bdev->base_bdevs));
CU_ASSERT(bdev->base_bdevs_cnt != 0);
spdk_bdev_unregister(bdev, NULL, NULL);
memset(bdev, 0xFF, sizeof(*bdev));
free(bdev);
}
static void
open_write_test(void)
{
struct spdk_bdev *bdev[8];
struct spdk_bdev_desc *desc[8] = {};
struct spdk_bdev *bdev[9];
struct spdk_bdev_desc *desc[9] = {};
int rc;
/*
* Create a tree of bdevs to test various open w/ write cases.
*
* bdev0 through bdev2 are physical block devices, such as NVMe
* bdev0 through bdev3 are physical block devices, such as NVMe
* namespaces or Ceph block devices.
*
* bdev3 is a virtual bdev with multiple base bdevs. This models
* bdev4 is a virtual bdev with multiple base bdevs. This models
* caching or RAID use cases.
*
* bdev4 through bdev6 are all virtual bdevs with the same base
* bdev. This models partitioning or logical volume use cases.
* bdev5 through bdev7 are all virtual bdevs with the same base
* bdev (except bdev7). This models partitioning or logical volume
* use cases.
*
* bdev7 is a virtual bdev with multiple base bdevs, but these
* bdev7 is a virtual bdev with multiple base bdevs. One of base bdevs
* (bdev2) is shared with other virtual bdevs: bdev5 and bdev6. This
* models caching, RAID, partitioning or logical volumes use cases.
*
* bdev8 is a virtual bdev with multiple base bdevs, but these
* base bdevs are themselves virtual bdevs.
*
* bdev7
* bdev8
* |
* +----------+
* | |
* bdev3 bdev4 bdev5 bdev6
* bdev4 bdev5 bdev6 bdev7
* | | | |
* +---+---+ +-------+-------+
* | | |
* bdev0 bdev1 bdev2
* +---+---+ +---+ + +---+---+
* | | \ | / \
* bdev0 bdev1 bdev2 bdev3
*/
bdev[0] = allocate_bdev("bdev0");
@ -198,18 +250,48 @@ open_write_test(void)
rc = spdk_bdev_module_claim_bdev(bdev[2], NULL, &bdev_ut_if);
CU_ASSERT(rc == 0);
bdev[3] = allocate_vbdev("bdev3", bdev[0], bdev[1]);
bdev[3] = allocate_bdev("bdev3");
rc = spdk_bdev_module_claim_bdev(bdev[3], NULL, &bdev_ut_if);
CU_ASSERT(rc == 0);
bdev[4] = allocate_vbdev("bdev4", bdev[2], NULL);
bdev[4] = allocate_vbdev("bdev4", bdev[0], bdev[1]);
rc = spdk_bdev_module_claim_bdev(bdev[4], NULL, &bdev_ut_if);
CU_ASSERT(rc == 0);
bdev[5] = allocate_vbdev("bdev5", bdev[2], NULL);
rc = spdk_bdev_module_claim_bdev(bdev[5], NULL, &bdev_ut_if);
CU_ASSERT(rc == 0);
bdev[6] = allocate_vbdev("bdev6", bdev[2], NULL);
bdev[7] = allocate_vbdev("bdev7", bdev[3], bdev[4]);
bdev[7] = allocate_vbdev("bdev7", bdev[2], bdev[3]);
bdev[8] = allocate_vbdev("bdev8", bdev[4], bdev[5]);
/* Check tree */
CU_ASSERT(check_base_and_vbdev(bdev[0], bdev[4]) == true);
CU_ASSERT(check_base_and_vbdev(bdev[0], bdev[5]) == false);
CU_ASSERT(check_base_and_vbdev(bdev[0], bdev[6]) == false);
CU_ASSERT(check_base_and_vbdev(bdev[0], bdev[7]) == false);
CU_ASSERT(check_base_and_vbdev(bdev[0], bdev[8]) == false);
CU_ASSERT(check_base_and_vbdev(bdev[1], bdev[4]) == true);
CU_ASSERT(check_base_and_vbdev(bdev[1], bdev[5]) == false);
CU_ASSERT(check_base_and_vbdev(bdev[1], bdev[6]) == false);
CU_ASSERT(check_base_and_vbdev(bdev[1], bdev[7]) == false);
CU_ASSERT(check_base_and_vbdev(bdev[1], bdev[8]) == false);
CU_ASSERT(check_base_and_vbdev(bdev[2], bdev[4]) == false);
CU_ASSERT(check_base_and_vbdev(bdev[2], bdev[5]) == true);
CU_ASSERT(check_base_and_vbdev(bdev[2], bdev[6]) == true);
CU_ASSERT(check_base_and_vbdev(bdev[2], bdev[7]) == true);
CU_ASSERT(check_base_and_vbdev(bdev[2], bdev[8]) == false);
CU_ASSERT(check_base_and_vbdev(bdev[3], bdev[4]) == false);
CU_ASSERT(check_base_and_vbdev(bdev[3], bdev[5]) == false);
CU_ASSERT(check_base_and_vbdev(bdev[3], bdev[6]) == false);
CU_ASSERT(check_base_and_vbdev(bdev[3], bdev[7]) == true);
CU_ASSERT(check_base_and_vbdev(bdev[3], bdev[8]) == false);
/* Open bdev0 read-only. This should succeed. */
rc = spdk_bdev_open(bdev[0], false, NULL, NULL, &desc[0]);
@ -225,29 +307,7 @@ open_write_test(void)
CU_ASSERT(rc == -EPERM);
/*
* Open bdev3 read/write. This should fail since bdev3 has been claimed
* by a vbdev module.
*/
rc = spdk_bdev_open(bdev[3], true, NULL, NULL, &desc[3]);
CU_ASSERT(rc == -EPERM);
/* Open bdev3 read-only. This should succeed. */
rc = spdk_bdev_open(bdev[3], false, NULL, NULL, &desc[3]);
CU_ASSERT(rc == 0);
CU_ASSERT(desc[3] != NULL);
spdk_bdev_close(desc[3]);
/*
* Open bdev7 read/write. This should succeed since it is a leaf
* bdev.
*/
rc = spdk_bdev_open(bdev[7], true, NULL, NULL, &desc[7]);
CU_ASSERT(rc == 0);
CU_ASSERT(desc[7] != NULL);
spdk_bdev_close(desc[7]);
/*
* Open bdev4 read/write. This should fail since bdev4 has been claimed
* Open bdev4 read/write. This should fail since bdev3 has been claimed
* by a vbdev module.
*/
rc = spdk_bdev_open(bdev[4], true, NULL, NULL, &desc[4]);
@ -259,17 +319,40 @@ open_write_test(void)
CU_ASSERT(desc[4] != NULL);
spdk_bdev_close(desc[4]);
free_vbdev(bdev[7]);
/*
* Open bdev8 read/write. This should succeed since it is a leaf
* bdev.
*/
rc = spdk_bdev_open(bdev[8], true, NULL, NULL, &desc[8]);
CU_ASSERT(rc == 0);
CU_ASSERT(desc[8] != NULL);
spdk_bdev_close(desc[8]);
/*
* Open bdev5 read/write. This should fail since bdev4 has been claimed
* by a vbdev module.
*/
rc = spdk_bdev_open(bdev[5], true, NULL, NULL, &desc[5]);
CU_ASSERT(rc == -EPERM);
/* Open bdev4 read-only. This should succeed. */
rc = spdk_bdev_open(bdev[5], false, NULL, NULL, &desc[5]);
CU_ASSERT(rc == 0);
CU_ASSERT(desc[5] != NULL);
spdk_bdev_close(desc[5]);
free_vbdev(bdev[8]);
free_vbdev(bdev[3]);
free_vbdev(bdev[4]);
free_vbdev(bdev[5]);
free_vbdev(bdev[6]);
free_vbdev(bdev[7]);
free_vbdev(bdev[4]);
free_bdev(bdev[0]);
free_bdev(bdev[1]);
free_bdev(bdev[2]);
free_bdev(bdev[3]);
}
static void

View File

@ -91,7 +91,8 @@ static void
part_test(void)
{
struct spdk_bdev_part_base *base;
struct spdk_bdev_part part1, part2;
struct spdk_bdev_part part1 = {};
struct spdk_bdev_part part2 = {};
struct spdk_bdev bdev_base = {};
SPDK_BDEV_PART_TAILQ tailq = TAILQ_HEAD_INITIALIZER(tailq);
int rc;
@ -116,7 +117,7 @@ part_test(void)
* The base device was removed - ensure that the partition vbdevs were
* removed from the base's vbdev list.
*/
CU_ASSERT(TAILQ_EMPTY(&bdev_base.vbdevs));
CU_ASSERT(bdev_base.vbdevs_cnt == 0);
spdk_bdev_part_base_free(base);
spdk_bdev_unregister(&bdev_base, NULL, NULL);