bdev/zone: Write handling

Added handling of write commands, including zone state changes and write
pointer updates.

Signed-off-by: Mateusz Kozlowski <mateusz.kozlowski@intel.com>
Change-Id: I576ca18b52474bb299c20296a7561f25e5afa85b
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/468037
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com>
This commit is contained in:
Mateusz Kozlowski 2019-08-23 12:55:52 +02:00 committed by Tomasz Zawadzki
parent d530c103ab
commit ab1641eb30
2 changed files with 278 additions and 15 deletions

View File

@ -185,17 +185,23 @@ zone_block_destruct(void *ctx)
} }
static struct block_zone * static struct block_zone *
zone_block_get_zone_by_slba(struct bdev_zone_block *bdev_node, uint64_t start_lba) zone_block_get_zone_containing_lba(struct bdev_zone_block *bdev_node, uint64_t lba)
{ {
struct block_zone *zone = NULL; size_t index = lba >> bdev_node->zone_shift;
size_t index = start_lba >> bdev_node->zone_shift;
if (index >= bdev_node->num_zones) { if (index >= bdev_node->num_zones) {
return NULL; return NULL;
} }
zone = &bdev_node->zones[index]; return &bdev_node->zones[index];
if (zone->zone_info.zone_id == start_lba) { }
static struct block_zone *
zone_block_get_zone_by_slba(struct bdev_zone_block *bdev_node, uint64_t start_lba)
{
struct block_zone *zone = zone_block_get_zone_containing_lba(bdev_node, start_lba);
if (zone && zone->zone_info.zone_id == start_lba) {
return zone; return zone;
} else { } else {
return NULL; return NULL;
@ -304,6 +310,91 @@ zone_block_zone_management(struct bdev_zone_block *bdev_node, struct zone_block_
} }
} }
static void
_zone_block_complete_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
struct spdk_bdev_io *orig_io = cb_arg;
int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
/* Complete the original IO and then free the one that we created here
* as a result of issuing an IO via submit_reqeust.
*/
spdk_bdev_io_complete(orig_io, status);
spdk_bdev_free_io(bdev_io);
}
static int
zone_block_write(struct bdev_zone_block *bdev_node, struct zone_block_io_channel *ch,
struct spdk_bdev_io *bdev_io)
{
struct block_zone *zone;
uint64_t len = bdev_io->u.bdev.num_blocks;
uint64_t lba = bdev_io->u.bdev.offset_blocks;
uint64_t num_blocks_left, wp;
int rc = 0;
zone = zone_block_get_zone_containing_lba(bdev_node, lba);
if (!zone) {
SPDK_ERRLOG("Trying to write to invalid zone (lba 0x%lx)\n", lba);
return -EINVAL;
}
pthread_spin_lock(&zone->lock);
switch (zone->zone_info.state) {
case SPDK_BDEV_ZONE_STATE_OPEN:
case SPDK_BDEV_ZONE_STATE_EMPTY:
case SPDK_BDEV_ZONE_STATE_CLOSED:
zone->zone_info.state = SPDK_BDEV_ZONE_STATE_OPEN;
break;
default:
SPDK_ERRLOG("Trying to write to zone in invalid state %u\n", zone->zone_info.state);
rc = -EINVAL;
goto write_fail;
}
wp = zone->zone_info.write_pointer;
if (lba != wp) {
SPDK_ERRLOG("Trying to write to zone with invalid address (lba 0x%lx, wp 0x%lx)\n", lba, wp);
rc = -EINVAL;
goto write_fail;
}
num_blocks_left = zone->zone_info.zone_id + zone->zone_info.capacity - wp;
if (len > num_blocks_left) {
SPDK_ERRLOG("Write exceeds zone capacity (lba 0x%" PRIu64 ", len 0x%lx, wp 0x%lx)\n", lba, len, wp);
rc = -EINVAL;
goto write_fail;
}
zone->zone_info.write_pointer += bdev_io->u.bdev.num_blocks;
assert(zone->zone_info.write_pointer <= zone->zone_info.zone_id + zone->zone_info.capacity);
if (zone->zone_info.write_pointer == zone->zone_info.zone_id + zone->zone_info.capacity) {
zone->zone_info.state = SPDK_BDEV_ZONE_STATE_FULL;
}
pthread_spin_unlock(&zone->lock);
if (bdev_io->u.bdev.md_buf == NULL) {
rc = spdk_bdev_writev_blocks(bdev_node->base_desc, ch->base_ch, bdev_io->u.bdev.iovs,
bdev_io->u.bdev.iovcnt, lba,
bdev_io->u.bdev.num_blocks, _zone_block_complete_write,
bdev_io);
} else {
rc = spdk_bdev_writev_blocks_with_md(bdev_node->base_desc, ch->base_ch,
bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
bdev_io->u.bdev.md_buf,
lba, bdev_io->u.bdev.num_blocks,
_zone_block_complete_write, bdev_io);
}
return rc;
write_fail:
pthread_spin_unlock(&zone->lock);
return rc;
}
static void static void
zone_block_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) zone_block_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
{ {
@ -318,6 +409,9 @@ zone_block_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_
case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
rc = zone_block_zone_management(bdev_node, dev_ch, bdev_io); rc = zone_block_zone_management(bdev_node, dev_ch, bdev_io);
break; break;
case SPDK_BDEV_IO_TYPE_WRITE:
rc = zone_block_write(bdev_node, dev_ch, bdev_io);
break;
default: default:
SPDK_ERRLOG("vbdev_block: unknown I/O type %u\n", bdev_io->type); SPDK_ERRLOG("vbdev_block: unknown I/O type %u\n", bdev_io->type);
rc = -ENOTSUP; rc = -ENOTSUP;
@ -340,6 +434,7 @@ zone_block_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
{ {
switch (io_type) { switch (io_type) {
case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT: case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
case SPDK_BDEV_IO_TYPE_WRITE:
return true; return true;
default: default:
return false; return false;

View File

@ -44,6 +44,7 @@
#define BLOCK_SIZE 4096 #define BLOCK_SIZE 4096
/* Globals */ /* Globals */
uint64_t g_block_cnt;
struct io_output *g_io_output = NULL; struct io_output *g_io_output = NULL;
uint32_t g_max_io_size; uint32_t g_max_io_size;
uint32_t g_io_output_index; uint32_t g_io_output_index;
@ -93,11 +94,12 @@ set_test_opts(void)
} }
static void static void
init_test_globals(void) init_test_globals(uint64_t block_cnt)
{ {
g_io_output = calloc(g_max_io_size, sizeof(struct io_output)); g_io_output = calloc(g_max_io_size, sizeof(struct io_output));
SPDK_CU_ASSERT_FATAL(g_io_output != NULL); SPDK_CU_ASSERT_FATAL(g_io_output != NULL);
g_io_output_index = 0; g_io_output_index = 0;
g_block_cnt = block_cnt;
} }
static void static void
@ -248,7 +250,7 @@ create_nvme_bdev(void)
base_bdev->name = strdup(name); base_bdev->name = strdup(name);
SPDK_CU_ASSERT_FATAL(base_bdev->name != NULL); SPDK_CU_ASSERT_FATAL(base_bdev->name != NULL);
base_bdev->blocklen = BLOCK_SIZE; base_bdev->blocklen = BLOCK_SIZE;
base_bdev->blockcnt = BLOCK_CNT; base_bdev->blockcnt = g_block_cnt;
base_bdev->write_unit_size = 1; base_bdev->write_unit_size = 1;
TAILQ_INSERT_TAIL(&g_bdev_list, base_bdev, internal.link); TAILQ_INSERT_TAIL(&g_bdev_list, base_bdev, internal.link);
@ -335,6 +337,47 @@ spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
return 0; return 0;
} }
int
spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
struct io_output *output = &g_io_output[g_io_output_index];
struct spdk_bdev_io *child_io;
SPDK_CU_ASSERT_FATAL(g_io_output_index < g_max_io_size);
set_io_output(output, desc, ch, offset_blocks, num_blocks, cb, cb_arg,
SPDK_BDEV_IO_TYPE_WRITE);
g_io_output_index++;
child_io = calloc(1, sizeof(struct spdk_bdev_io));
SPDK_CU_ASSERT_FATAL(child_io != NULL);
child_io->internal.desc = desc;
child_io->type = SPDK_BDEV_IO_TYPE_WRITE;
child_io->u.bdev.iovs = iov;
child_io->u.bdev.iovcnt = iovcnt;
child_io->u.bdev.md_buf = md;
child_io->u.bdev.num_blocks = num_blocks;
child_io->u.bdev.offset_blocks = offset_blocks;
cb(child_io, true, cb_arg);
return 0;
}
int
spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
return spdk_bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks, num_blocks,
cb, cb_arg);
}
static void static void
verify_config_present(const char *name, bool presence) verify_config_present(const char *name, bool presence)
{ {
@ -480,7 +523,7 @@ verify_zone_bdev(bool presence)
} }
expected_optimal_open_zones = spdk_max(r->optimal_open_zones, 1); expected_optimal_open_zones = spdk_max(r->optimal_open_zones, 1);
expected_num_zones = BLOCK_CNT / spdk_align64pow2(r->zone_capacity) / expected_optimal_open_zones; expected_num_zones = g_block_cnt / spdk_align64pow2(r->zone_capacity) / expected_optimal_open_zones;
expected_num_zones *= expected_optimal_open_zones; expected_num_zones *= expected_optimal_open_zones;
CU_ASSERT(bdev->num_zones == expected_num_zones); CU_ASSERT(bdev->num_zones == expected_num_zones);
@ -551,6 +594,7 @@ test_zone_block_create(void)
size_t num_zones = 16; size_t num_zones = 16;
size_t zone_capacity = BLOCK_CNT / num_zones; size_t zone_capacity = BLOCK_CNT / num_zones;
init_test_globals(BLOCK_CNT);
CU_ASSERT(zone_block_init() == 0); CU_ASSERT(zone_block_init() == 0);
/* Create zoned virtual device before nvme device */ /* Create zoned virtual device before nvme device */
@ -585,6 +629,7 @@ test_zone_block_create_invalid(void)
size_t num_zones = 8; size_t num_zones = 8;
size_t zone_capacity = BLOCK_CNT / num_zones; size_t zone_capacity = BLOCK_CNT / num_zones;
init_test_globals(BLOCK_CNT);
CU_ASSERT(zone_block_init() == 0); CU_ASSERT(zone_block_init() == 0);
/* Create zoned virtual device and verify its correctness */ /* Create zoned virtual device and verify its correctness */
@ -645,11 +690,38 @@ bdev_io_zone_cleanup(struct spdk_bdev_io *bdev_io)
free(bdev_io); free(bdev_io);
} }
static void
bdev_io_initialize(struct spdk_bdev_io *bdev_io, struct spdk_bdev *bdev,
uint64_t lba, uint64_t blocks, int16_t iotype)
{
bdev_io->bdev = bdev;
bdev_io->u.bdev.offset_blocks = lba;
bdev_io->u.bdev.num_blocks = blocks;
bdev_io->type = iotype;
if (bdev_io->type == SPDK_BDEV_IO_TYPE_UNMAP || bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH) {
return;
}
bdev_io->u.bdev.iovcnt = 1;
bdev_io->u.bdev.iovs = &bdev_io->iov;
bdev_io->u.bdev.iovs->iov_base = calloc(1, bdev_io->u.bdev.num_blocks * BLOCK_SIZE);
SPDK_CU_ASSERT_FATAL(bdev_io->u.bdev.iovs->iov_base != NULL);
bdev_io->u.bdev.iovs->iov_len = bdev_io->u.bdev.num_blocks * BLOCK_SIZE;
}
static void
bdev_io_cleanup(struct spdk_bdev_io *bdev_io)
{
free(bdev_io->iov.iov_base);
free(bdev_io);
}
static struct bdev_zone_block * static struct bdev_zone_block *
create_and_get_vbdev(char *vdev_name, char *name, uint64_t num_zones, uint64_t optimal_open_zones, create_and_get_vbdev(char *vdev_name, char *name, uint64_t num_zones, uint64_t optimal_open_zones,
bool create_bdev) bool create_bdev)
{ {
size_t zone_size = BLOCK_CNT / num_zones; size_t zone_size = g_block_cnt / num_zones;
struct bdev_zone_block *bdev = NULL; struct bdev_zone_block *bdev = NULL;
send_create_vbdev(vdev_name, name, zone_size, optimal_open_zones, create_bdev, true); send_create_vbdev(vdev_name, name, zone_size, optimal_open_zones, create_bdev, true);
@ -671,7 +743,7 @@ test_supported_io_types(void)
char *name = "Nvme0n1"; char *name = "Nvme0n1";
uint32_t num_zones = 8; uint32_t num_zones = 8;
init_test_globals(); init_test_globals(BLOCK_CNT);
CU_ASSERT(zone_block_init() == 0); CU_ASSERT(zone_block_init() == 0);
/* Create zone dev */ /* Create zone dev */
@ -680,7 +752,7 @@ test_supported_io_types(void)
CU_ASSERT(zone_block_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT) == true); CU_ASSERT(zone_block_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT) == true);
CU_ASSERT(zone_block_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZONE_APPEND) == false); CU_ASSERT(zone_block_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZONE_APPEND) == false);
CU_ASSERT(zone_block_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_READ) == false); CU_ASSERT(zone_block_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_READ) == false);
CU_ASSERT(zone_block_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE) == false); CU_ASSERT(zone_block_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE) == true);
CU_ASSERT(zone_block_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN) == false); CU_ASSERT(zone_block_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN) == false);
CU_ASSERT(zone_block_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_IO) == false); CU_ASSERT(zone_block_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_IO) == false);
@ -735,7 +807,7 @@ test_get_zone_info(void)
uint32_t num_zones = 8, i; uint32_t num_zones = 8, i;
struct spdk_bdev_zone_info *info; struct spdk_bdev_zone_info *info;
init_test_globals(); init_test_globals(BLOCK_CNT);
CU_ASSERT(zone_block_init() == 0); CU_ASSERT(zone_block_init() == 0);
/* Create zone dev */ /* Create zone dev */
@ -847,7 +919,7 @@ test_reset_zone(void)
uint64_t zone_id; uint64_t zone_id;
uint32_t output_index = 0; uint32_t output_index = 0;
init_test_globals(); init_test_globals(BLOCK_CNT);
CU_ASSERT(zone_block_init() == 0); CU_ASSERT(zone_block_init() == 0);
/* Create zone dev */ /* Create zone dev */
@ -888,6 +960,25 @@ test_reset_zone(void)
test_cleanup(); test_cleanup();
} }
static void
send_write_zone(struct bdev_zone_block *bdev, struct spdk_io_channel *ch, uint64_t lba,
uint64_t blocks, uint32_t output_index, bool success)
{
struct spdk_bdev_io *bdev_io;
bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct zone_block_io));
SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
bdev_io_initialize(bdev_io, &bdev->bdev, lba, blocks, SPDK_BDEV_IO_TYPE_WRITE);
memset(g_io_output, 0, (g_max_io_size * sizeof(struct io_output)));
g_io_output_index = output_index;
g_io_comp_status = !success;
zone_block_submit_request(ch, bdev_io);
CU_ASSERT(g_io_comp_status == success);
bdev_io_cleanup(bdev_io);
}
static void static void
test_open_zone(void) test_open_zone(void)
{ {
@ -898,7 +989,7 @@ test_open_zone(void)
uint64_t zone_id; uint64_t zone_id;
uint32_t output_index = 0, i; uint32_t output_index = 0, i;
init_test_globals(); init_test_globals(BLOCK_CNT);
CU_ASSERT(zone_block_init() == 0); CU_ASSERT(zone_block_init() == 0);
/* Create zone dev */ /* Create zone dev */
@ -955,6 +1046,82 @@ test_open_zone(void)
test_cleanup(); test_cleanup();
} }
static void
test_zone_write(void)
{
struct spdk_io_channel *ch;
struct bdev_zone_block *bdev;
char *name = "Nvme0n1";
uint32_t num_zones = 20;
uint64_t zone_id, lba, block_len;
uint32_t output_index = 0, i;
init_test_globals(20 * 1024ul);
CU_ASSERT(zone_block_init() == 0);
/* Create zone dev */
bdev = create_and_get_vbdev("zone_dev1", name, num_zones, 1, true);
ch = calloc(1, sizeof(struct spdk_io_channel) + sizeof(struct zone_block_io_channel));
SPDK_CU_ASSERT_FATAL(ch != NULL);
/* Write to full zone */
lba = 0;
send_write_zone(bdev, ch, lba, 1, output_index, false);
/* Write out of device range */
lba = g_block_cnt;
send_write_zone(bdev, ch, lba, 1, output_index, false);
/* Write 1 sector to zone 0 */
lba = 0;
send_reset_zone(bdev, ch, lba, output_index, true);
send_write_zone(bdev, ch, lba, 1, output_index, true);
send_zone_info(bdev, ch, lba, 1, SPDK_BDEV_ZONE_STATE_OPEN, output_index, true);
/* Write to another zone */
lba = bdev->bdev.zone_size;
send_reset_zone(bdev, ch, lba, output_index, true);
send_write_zone(bdev, ch, lba, 5, output_index, true);
send_zone_info(bdev, ch, lba, lba + 5, SPDK_BDEV_ZONE_STATE_OPEN, output_index, true);
/* Fill zone 0 and verify zone state change */
block_len = 15;
send_write_zone(bdev, ch, 1, block_len, output_index, true);
block_len = 16;
for (i = block_len; i < bdev->bdev.zone_size; i += block_len) {
send_write_zone(bdev, ch, i, block_len, output_index, true);
}
send_zone_info(bdev, ch, 0, bdev->bdev.zone_size, SPDK_BDEV_ZONE_STATE_FULL, output_index,
true);
/* Write to wrong write pointer */
lba = bdev->bdev.zone_size;
send_write_zone(bdev, ch, lba + 7, 1, output_index, false);
/* Write to already written sectors */
send_write_zone(bdev, ch, lba, 1, output_index, false);
/* Write to two zones at once */
for (i = 0; i < num_zones; i++) {
zone_id = i * bdev->bdev.zone_size;
send_reset_zone(bdev, ch, zone_id, output_index, true);
send_zone_info(bdev, ch, zone_id, zone_id, SPDK_BDEV_ZONE_STATE_EMPTY, output_index, true);
}
block_len = 16;
for (i = 0; i < bdev->bdev.zone_size - block_len; i += block_len) {
send_write_zone(bdev, ch, i, block_len, output_index, true);
}
send_write_zone(bdev, ch, bdev->bdev.zone_size - block_len, 32, output_index, false);
/* Delete zone dev */
send_delete_vbdev("zone_dev1", true);
while (spdk_thread_poll(g_thread, 0, 0) > 0) {}
free(ch);
test_cleanup();
}
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
CU_pSuite suite = NULL; CU_pSuite suite = NULL;
@ -976,7 +1143,8 @@ int main(int argc, char **argv)
CU_add_test(suite, "test_get_zone_info", test_get_zone_info) == NULL || CU_add_test(suite, "test_get_zone_info", test_get_zone_info) == NULL ||
CU_add_test(suite, "test_supported_io_types", test_supported_io_types) == NULL || CU_add_test(suite, "test_supported_io_types", test_supported_io_types) == NULL ||
CU_add_test(suite, "test_reset_zone", test_reset_zone) == NULL || CU_add_test(suite, "test_reset_zone", test_reset_zone) == NULL ||
CU_add_test(suite, "test_open_zone", test_open_zone) == NULL CU_add_test(suite, "test_open_zone", test_open_zone) == NULL ||
CU_add_test(suite, "test_zone_write", test_zone_write) == NULL
) { ) {
CU_cleanup_registry(); CU_cleanup_registry();
return CU_get_error(); return CU_get_error();