bdev/nvme: Retry I/O to the same path if error is I/O error
When an I/O gets an I/O error, the I/O path to which the I/O was submitted may be still available. In this case, the I/O should be retried to the same I/O path. However, a new I/O path was always selected for an I/O retry. For the active/passive policy, the same I/O path was selected naturally. However, for the active/active policy, it was very likely that a different I/O path was selected. To use the same I/O path for an I/O retry, add a helper function bdev_nvme_retry_io() into bdev_nvme_retry_ios() and replace bdev_nvme_submit_request() by bdev_nvme_retry_io(). bdev_nvme_retry_io() checks if nbdev_io->io_path is not NULL and is available. Then, call _bdev_nvme_submit_request() if true, or call bdev_nvme_submit_request() otherwise. For I/O path error, clear nbdev_io->io_path for clarification. Add unit test to verify this change. Linux kernel native NVMe multipath already takes this approach. Hence, this change will be reasonable. Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Change-Id: I7022aafd8b1cdd5830c4f743d64b080aa970cf8d Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16015 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com> Reviewed-by: Richael <richael.zhuang@arm.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
6fa7cf97d0
commit
ae620784bd
@ -145,6 +145,8 @@ static void nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
|
|||||||
struct nvme_async_probe_ctx *ctx);
|
struct nvme_async_probe_ctx *ctx);
|
||||||
static int bdev_nvme_library_init(void);
|
static int bdev_nvme_library_init(void);
|
||||||
static void bdev_nvme_library_fini(void);
|
static void bdev_nvme_library_fini(void);
|
||||||
|
static void _bdev_nvme_submit_request(struct nvme_bdev_channel *nbdev_ch,
|
||||||
|
struct spdk_bdev_io *bdev_io);
|
||||||
static void bdev_nvme_submit_request(struct spdk_io_channel *ch,
|
static void bdev_nvme_submit_request(struct spdk_io_channel *ch,
|
||||||
struct spdk_bdev_io *bdev_io);
|
struct spdk_bdev_io *bdev_io);
|
||||||
static int bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
|
static int bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
|
||||||
@ -943,11 +945,24 @@ any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
bdev_nvme_retry_io(struct nvme_bdev_channel *nbdev_ch, struct spdk_bdev_io *bdev_io)
|
||||||
|
{
|
||||||
|
struct nvme_bdev_io *nbdev_io = (struct nvme_bdev_io *)bdev_io->driver_ctx;
|
||||||
|
struct spdk_io_channel *ch;
|
||||||
|
|
||||||
|
if (nbdev_io->io_path != NULL && nvme_io_path_is_available(nbdev_io->io_path)) {
|
||||||
|
_bdev_nvme_submit_request(nbdev_ch, bdev_io);
|
||||||
|
} else {
|
||||||
|
ch = spdk_io_channel_from_ctx(nbdev_ch);
|
||||||
|
bdev_nvme_submit_request(ch, bdev_io);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
bdev_nvme_retry_ios(void *arg)
|
bdev_nvme_retry_ios(void *arg)
|
||||||
{
|
{
|
||||||
struct nvme_bdev_channel *nbdev_ch = arg;
|
struct nvme_bdev_channel *nbdev_ch = arg;
|
||||||
struct spdk_io_channel *ch = spdk_io_channel_from_ctx(nbdev_ch);
|
|
||||||
struct spdk_bdev_io *bdev_io, *tmp_bdev_io;
|
struct spdk_bdev_io *bdev_io, *tmp_bdev_io;
|
||||||
struct nvme_bdev_io *bio;
|
struct nvme_bdev_io *bio;
|
||||||
uint64_t now, delay_us;
|
uint64_t now, delay_us;
|
||||||
@ -962,7 +977,7 @@ bdev_nvme_retry_ios(void *arg)
|
|||||||
|
|
||||||
TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io, module_link);
|
TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io, module_link);
|
||||||
|
|
||||||
bdev_nvme_submit_request(ch, bdev_io);
|
bdev_nvme_retry_io(nbdev_ch, bdev_io);
|
||||||
}
|
}
|
||||||
|
|
||||||
spdk_poller_unregister(&nbdev_ch->retry_io_poller);
|
spdk_poller_unregister(&nbdev_ch->retry_io_poller);
|
||||||
@ -1112,11 +1127,15 @@ bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
|
|||||||
!nvme_io_path_is_available(io_path) ||
|
!nvme_io_path_is_available(io_path) ||
|
||||||
!nvme_ctrlr_is_available(nvme_ctrlr)) {
|
!nvme_ctrlr_is_available(nvme_ctrlr)) {
|
||||||
nbdev_ch->current_io_path = NULL;
|
nbdev_ch->current_io_path = NULL;
|
||||||
|
bio->io_path = NULL;
|
||||||
if (spdk_nvme_cpl_is_ana_error(cpl)) {
|
if (spdk_nvme_cpl_is_ana_error(cpl)) {
|
||||||
if (nvme_ctrlr_read_ana_log_page(nvme_ctrlr) == 0) {
|
if (nvme_ctrlr_read_ana_log_page(nvme_ctrlr) == 0) {
|
||||||
io_path->nvme_ns->ana_state_updating = true;
|
io_path->nvme_ns->ana_state_updating = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!any_io_path_may_become_available(nbdev_ch)) {
|
||||||
|
goto complete;
|
||||||
|
}
|
||||||
delay_ms = 0;
|
delay_ms = 0;
|
||||||
} else {
|
} else {
|
||||||
bio->retry_count++;
|
bio->retry_count++;
|
||||||
@ -1130,10 +1149,8 @@ bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (any_io_path_may_become_available(nbdev_ch)) {
|
|
||||||
bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms);
|
bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms);
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
complete:
|
complete:
|
||||||
bio->retry_count = 0;
|
bio->retry_count = 0;
|
||||||
@ -1158,6 +1175,7 @@ bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc)
|
|||||||
nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
|
nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
|
||||||
|
|
||||||
nbdev_ch->current_io_path = NULL;
|
nbdev_ch->current_io_path = NULL;
|
||||||
|
bio->io_path = NULL;
|
||||||
|
|
||||||
if (any_io_path_may_become_available(nbdev_ch)) {
|
if (any_io_path_may_become_available(nbdev_ch)) {
|
||||||
bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
|
bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
|
||||||
|
@ -6179,6 +6179,181 @@ test_uuid_generation(void)
|
|||||||
CU_ASSERT((spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &uuid1)) == 0);
|
CU_ASSERT((spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &uuid1)) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_retry_io_to_same_path(void)
|
||||||
|
{
|
||||||
|
struct nvme_path_id path1 = {}, path2 = {};
|
||||||
|
struct spdk_nvme_ctrlr *ctrlr1, *ctrlr2;
|
||||||
|
struct nvme_bdev_ctrlr *nbdev_ctrlr;
|
||||||
|
struct nvme_ctrlr *nvme_ctrlr1, *nvme_ctrlr2;
|
||||||
|
const int STRING_SIZE = 32;
|
||||||
|
const char *attached_names[STRING_SIZE];
|
||||||
|
struct nvme_bdev *bdev;
|
||||||
|
struct spdk_bdev_io *bdev_io;
|
||||||
|
struct nvme_bdev_io *bio;
|
||||||
|
struct spdk_io_channel *ch;
|
||||||
|
struct nvme_bdev_channel *nbdev_ch;
|
||||||
|
struct nvme_io_path *io_path1, *io_path2;
|
||||||
|
struct ut_nvme_req *req;
|
||||||
|
struct spdk_uuid uuid1 = { .u.raw = { 0x1 } };
|
||||||
|
int done;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
g_opts.nvme_ioq_poll_period_us = 1;
|
||||||
|
|
||||||
|
memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
|
||||||
|
ut_init_trid(&path1.trid);
|
||||||
|
ut_init_trid2(&path2.trid);
|
||||||
|
g_ut_attach_ctrlr_status = 0;
|
||||||
|
g_ut_attach_bdev_count = 1;
|
||||||
|
|
||||||
|
set_thread(0);
|
||||||
|
|
||||||
|
ctrlr1 = ut_attach_ctrlr(&path1.trid, 1, true, true);
|
||||||
|
SPDK_CU_ASSERT_FATAL(ctrlr1 != NULL);
|
||||||
|
|
||||||
|
ctrlr1->ns[0].uuid = &uuid1;
|
||||||
|
|
||||||
|
rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE,
|
||||||
|
attach_ctrlr_done, NULL, NULL, NULL, true);
|
||||||
|
CU_ASSERT(rc == 0);
|
||||||
|
|
||||||
|
spdk_delay_us(1000);
|
||||||
|
poll_threads();
|
||||||
|
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
|
||||||
|
poll_threads();
|
||||||
|
|
||||||
|
ctrlr2 = ut_attach_ctrlr(&path2.trid, 1, true, true);
|
||||||
|
SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL);
|
||||||
|
|
||||||
|
ctrlr2->ns[0].uuid = &uuid1;
|
||||||
|
|
||||||
|
rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE,
|
||||||
|
attach_ctrlr_done, NULL, NULL, NULL, true);
|
||||||
|
CU_ASSERT(rc == 0);
|
||||||
|
|
||||||
|
spdk_delay_us(1000);
|
||||||
|
poll_threads();
|
||||||
|
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
|
||||||
|
poll_threads();
|
||||||
|
|
||||||
|
nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name("nvme0");
|
||||||
|
SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL);
|
||||||
|
|
||||||
|
nvme_ctrlr1 = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path1.trid);
|
||||||
|
SPDK_CU_ASSERT_FATAL(nvme_ctrlr1 != NULL);
|
||||||
|
|
||||||
|
nvme_ctrlr2 = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path2.trid);
|
||||||
|
SPDK_CU_ASSERT_FATAL(nvme_ctrlr2 != NULL);
|
||||||
|
|
||||||
|
bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1);
|
||||||
|
SPDK_CU_ASSERT_FATAL(bdev != NULL);
|
||||||
|
|
||||||
|
done = -1;
|
||||||
|
bdev_nvme_set_multipath_policy(bdev->disk.name, BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE,
|
||||||
|
ut_set_multipath_policy_done, &done);
|
||||||
|
poll_threads();
|
||||||
|
CU_ASSERT(done == 0);
|
||||||
|
|
||||||
|
CU_ASSERT(bdev->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE);
|
||||||
|
|
||||||
|
ch = spdk_get_io_channel(bdev);
|
||||||
|
SPDK_CU_ASSERT_FATAL(ch != NULL);
|
||||||
|
nbdev_ch = spdk_io_channel_get_ctx(ch);
|
||||||
|
|
||||||
|
CU_ASSERT(nbdev_ch->mp_policy == BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE);
|
||||||
|
|
||||||
|
bdev_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_WRITE, bdev, ch);
|
||||||
|
ut_bdev_io_set_buf(bdev_io);
|
||||||
|
|
||||||
|
bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
|
||||||
|
|
||||||
|
io_path1 = ut_get_io_path_by_ctrlr(nbdev_ch, nvme_ctrlr1);
|
||||||
|
SPDK_CU_ASSERT_FATAL(io_path1 != NULL);
|
||||||
|
|
||||||
|
io_path2 = ut_get_io_path_by_ctrlr(nbdev_ch, nvme_ctrlr2);
|
||||||
|
SPDK_CU_ASSERT_FATAL(io_path2 != NULL);
|
||||||
|
|
||||||
|
/* The 1st I/O should be submitted to io_path1. */
|
||||||
|
bdev_io->internal.in_submit_request = true;
|
||||||
|
|
||||||
|
bdev_nvme_submit_request(ch, bdev_io);
|
||||||
|
CU_ASSERT(bdev_io->internal.in_submit_request == true);
|
||||||
|
CU_ASSERT(bio->io_path == io_path1);
|
||||||
|
CU_ASSERT(io_path1->qpair->qpair->num_outstanding_reqs == 1);
|
||||||
|
|
||||||
|
spdk_delay_us(1);
|
||||||
|
|
||||||
|
poll_threads();
|
||||||
|
CU_ASSERT(bdev_io->internal.in_submit_request == false);
|
||||||
|
CU_ASSERT(bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS);
|
||||||
|
|
||||||
|
/* The 2nd I/O should be submitted to io_path2 because the path selection
|
||||||
|
* policy is round-robin.
|
||||||
|
*/
|
||||||
|
bdev_io->internal.in_submit_request = true;
|
||||||
|
|
||||||
|
bdev_nvme_submit_request(ch, bdev_io);
|
||||||
|
CU_ASSERT(bdev_io->internal.in_submit_request == true);
|
||||||
|
CU_ASSERT(bio->io_path == io_path2);
|
||||||
|
CU_ASSERT(io_path2->qpair->qpair->num_outstanding_reqs == 1);
|
||||||
|
|
||||||
|
req = ut_get_outstanding_nvme_request(io_path2->qpair->qpair, bio);
|
||||||
|
SPDK_CU_ASSERT_FATAL(req != NULL);
|
||||||
|
|
||||||
|
/* Set retry count to non-zero. */
|
||||||
|
g_opts.bdev_retry_count = 1;
|
||||||
|
|
||||||
|
/* Inject an I/O error. */
|
||||||
|
req->cpl.status.sc = SPDK_NVME_SC_NAMESPACE_NOT_READY;
|
||||||
|
req->cpl.status.sct = SPDK_NVME_SCT_GENERIC;
|
||||||
|
|
||||||
|
/* The 2nd I/O should be queued to nbdev_ch. */
|
||||||
|
spdk_delay_us(1);
|
||||||
|
poll_thread_times(0, 1);
|
||||||
|
|
||||||
|
CU_ASSERT(io_path2->qpair->qpair->num_outstanding_reqs == 0);
|
||||||
|
CU_ASSERT(bdev_io->internal.in_submit_request == true);
|
||||||
|
CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));
|
||||||
|
|
||||||
|
/* The 2nd I/O should keep caching io_path2. */
|
||||||
|
CU_ASSERT(bio->io_path == io_path2);
|
||||||
|
|
||||||
|
/* The 2nd I/O should be submitted to io_path2 again. */
|
||||||
|
poll_thread_times(0, 1);
|
||||||
|
|
||||||
|
CU_ASSERT(bdev_io->internal.in_submit_request == true);
|
||||||
|
CU_ASSERT(bio->io_path == io_path2);
|
||||||
|
CU_ASSERT(io_path2->qpair->qpair->num_outstanding_reqs == 1);
|
||||||
|
|
||||||
|
spdk_delay_us(1);
|
||||||
|
poll_threads();
|
||||||
|
|
||||||
|
CU_ASSERT(io_path2->qpair->qpair->num_outstanding_reqs == 0);
|
||||||
|
CU_ASSERT(bdev_io->internal.in_submit_request == false);
|
||||||
|
CU_ASSERT(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
|
||||||
|
|
||||||
|
free(bdev_io);
|
||||||
|
|
||||||
|
spdk_put_io_channel(ch);
|
||||||
|
|
||||||
|
poll_threads();
|
||||||
|
spdk_delay_us(1);
|
||||||
|
poll_threads();
|
||||||
|
|
||||||
|
rc = bdev_nvme_delete("nvme0", &g_any_path);
|
||||||
|
CU_ASSERT(rc == 0);
|
||||||
|
|
||||||
|
poll_threads();
|
||||||
|
spdk_delay_us(1000);
|
||||||
|
poll_threads();
|
||||||
|
|
||||||
|
CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL);
|
||||||
|
|
||||||
|
g_opts.nvme_ioq_poll_period_us = 0;
|
||||||
|
g_opts.bdev_retry_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, const char **argv)
|
main(int argc, const char **argv)
|
||||||
{
|
{
|
||||||
@ -6230,6 +6405,7 @@ main(int argc, const char **argv)
|
|||||||
CU_ADD_TEST(suite, test_disable_auto_failback);
|
CU_ADD_TEST(suite, test_disable_auto_failback);
|
||||||
CU_ADD_TEST(suite, test_set_multipath_policy);
|
CU_ADD_TEST(suite, test_set_multipath_policy);
|
||||||
CU_ADD_TEST(suite, test_uuid_generation);
|
CU_ADD_TEST(suite, test_uuid_generation);
|
||||||
|
CU_ADD_TEST(suite, test_retry_io_to_same_path);
|
||||||
|
|
||||||
CU_basic_set_mode(CU_BRM_VERBOSE);
|
CU_basic_set_mode(CU_BRM_VERBOSE);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user