From 28bcf6a760ca2cfe22868573f9b7f0125ddcd8c0 Mon Sep 17 00:00:00 2001 From: Konrad Sztyber Date: Wed, 19 Apr 2023 15:35:46 +0200 Subject: [PATCH] bdev: retry IOs on ENOMEM from pull/append_copy The IOs will now be retried after ENOMEM is received when doing memory domain pull or appending an accel copy. The retries are performed using the mechanism that's already in place for IOs completed with SPDK_BDEV_IO_STATUS_NOMEM. Signed-off-by: Konrad Sztyber Change-Id: I284643bf9971338094e14617974f7511f745f24e Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/17761 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Ben Walker Reviewed-by: Jim Harris --- lib/bdev/bdev.c | 62 ++++++++++++++++++++++++----- test/unit/lib/bdev/bdev.c/bdev_ut.c | 34 +++++++++++++++- 2 files changed, 86 insertions(+), 10 deletions(-) diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 0be796cb6..08211981b 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -368,6 +368,8 @@ struct spdk_bdev_io_error_stat { enum bdev_io_retry_state { BDEV_IO_RETRY_STATE_INVALID, + BDEV_IO_RETRY_STATE_PULL, + BDEV_IO_RETRY_STATE_PULL_MD, BDEV_IO_RETRY_STATE_SUBMIT, }; @@ -414,6 +416,8 @@ static bool claim_type_is_v2(enum spdk_bdev_claim_type type); static void bdev_desc_release_claims(struct spdk_bdev_desc *desc); static void claim_reset(struct spdk_bdev *bdev); +static void bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch); + #define bdev_get_ext_io_opt(opts, field, defval) \ (((opts) != NULL && offsetof(struct spdk_bdev_ext_io_opts, field) + \ sizeof((opts)->field) <= sizeof(*(opts))) ? (opts)->field : (defval)) @@ -1061,6 +1065,11 @@ bdev_io_exec_sequence_cb(void *ctx, int status) TAILQ_REMOVE(&bdev_io->internal.ch->io_accel_exec, bdev_io, internal.link); bdev_io_decrement_outstanding(ch, ch->shared_resource); + + if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) { + bdev_ch_retry_io(ch); + } + bdev_io->internal.data_transfer_cpl(bdev_io, status); } @@ -1126,6 +1135,11 @@ bdev_io_pull_md_buf_done(void *ctx, int status) TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link); bdev_io_decrement_outstanding(ch, ch->shared_resource); + + if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) { + bdev_ch_retry_io(ch); + } + assert(bdev_io->internal.data_transfer_cpl); bdev_io->internal.data_transfer_cpl(bdev_io, status); } @@ -1151,8 +1165,11 @@ bdev_io_pull_md_buf(struct spdk_bdev_io *bdev_io) } bdev_io_decrement_outstanding(ch, ch->shared_resource); TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link); - SPDK_ERRLOG("Failed to pull data from memory domain %s, rc %d\n", - spdk_memory_domain_get_dma_device_id(bdev_io->internal.memory_domain), rc); + if (rc != -ENOMEM) { + SPDK_ERRLOG("Failed to pull data from memory domain %s, rc %d\n", + spdk_memory_domain_get_dma_device_id( + bdev_io->internal.memory_domain), rc); + } } else { memcpy(bdev_io->internal.bounce_md_iov.iov_base, bdev_io->internal.orig_md_iov.iov_base, @@ -1160,8 +1177,12 @@ bdev_io_pull_md_buf(struct spdk_bdev_io *bdev_io) } } - assert(bdev_io->internal.data_transfer_cpl); - bdev_io->internal.data_transfer_cpl(bdev_io, rc); + if (spdk_unlikely(rc == -ENOMEM)) { + bdev_queue_nomem_io_head(ch->shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PULL_MD); + } else { + assert(bdev_io->internal.data_transfer_cpl); + bdev_io->internal.data_transfer_cpl(bdev_io, rc); + } } static void @@ -1228,6 +1249,10 @@ _bdev_io_pull_bounce_data_buf_done(void *ctx, int status) TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link); bdev_io_decrement_outstanding(ch, ch->shared_resource); + if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) { + bdev_ch_retry_io(ch); + } + bdev_io_pull_bounce_data_buf_done(ctx, status); } @@ -1261,7 +1286,7 @@ bdev_io_pull_data(struct spdk_bdev_io *bdev_io) NULL, NULL, 0, NULL, NULL); } - if (spdk_unlikely(rc != 0)) { + if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) { SPDK_ERRLOG("Failed to append copy to accel sequence: %p\n", bdev_io->internal.accel_sequence); } @@ -1283,8 +1308,11 @@ bdev_io_pull_data(struct spdk_bdev_io *bdev_io) } TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link); bdev_io_decrement_outstanding(ch, ch->shared_resource); - SPDK_ERRLOG("Failed to pull data from memory domain %s\n", - spdk_memory_domain_get_dma_device_id(bdev_io->internal.memory_domain)); + if (rc != -ENOMEM) { + SPDK_ERRLOG("Failed to pull data from memory domain %s\n", + spdk_memory_domain_get_dma_device_id( + bdev_io->internal.memory_domain)); + } } else { assert(bdev_io->u.bdev.iovcnt == 1); spdk_copy_iovs_to_buf(bdev_io->u.bdev.iovs[0].iov_base, @@ -1294,13 +1322,19 @@ bdev_io_pull_data(struct spdk_bdev_io *bdev_io) } } - bdev_io_pull_bounce_data_buf_done(bdev_io, rc); + if (spdk_unlikely(rc == -ENOMEM)) { + bdev_queue_nomem_io_head(ch->shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PULL); + } else { + bdev_io_pull_bounce_data_buf_done(bdev_io, rc); + } } static void _bdev_io_pull_bounce_data_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len, bdev_copy_bounce_buffer_cpl cpl_cb) { + struct spdk_bdev_shared_resource *shared_resource = bdev_io->internal.ch->shared_resource; + bdev_io->internal.data_transfer_cpl = cpl_cb; /* save original iovec */ bdev_io->internal.orig_iovs = bdev_io->u.bdev.iovs; @@ -1312,7 +1346,11 @@ _bdev_io_pull_bounce_data_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t le bdev_io->u.bdev.iovs[0].iov_base = buf; bdev_io->u.bdev.iovs[0].iov_len = len; - bdev_io_pull_data(bdev_io); + if (spdk_unlikely(!TAILQ_EMPTY(&shared_resource->nomem_io))) { + bdev_queue_nomem_io_tail(shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PULL); + } else { + bdev_io_pull_data(bdev_io); + } } static void @@ -1438,6 +1476,12 @@ bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch) case BDEV_IO_RETRY_STATE_SUBMIT: bdev_ch_resubmit_io(bdev_ch, bdev_io); break; + case BDEV_IO_RETRY_STATE_PULL: + bdev_io_pull_data(bdev_io); + break; + case BDEV_IO_RETRY_STATE_PULL_MD: + bdev_io_pull_md_buf(bdev_io); + break; default: assert(0 && "invalid retry state"); break; diff --git a/test/unit/lib/bdev/bdev.c/bdev_ut.c b/test/unit/lib/bdev/bdev.c/bdev_ut.c index 7291f1273..56f1af2c8 100644 --- a/test/unit/lib/bdev/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/bdev.c/bdev_ut.c @@ -5795,7 +5795,7 @@ bdev_io_ext_bounce_buffer(void) struct spdk_io_channel *io_ch; char io_buf[512]; struct iovec iov = { .iov_base = io_buf, .iov_len = 512 }; - struct ut_expected_io *expected_io; + struct ut_expected_io *expected_io, *aux_io; struct spdk_bdev_ext_io_opts ext_io_opts = { .metadata = (void *)0xFF000000, .size = sizeof(ext_io_opts) @@ -5849,6 +5849,38 @@ bdev_io_ext_bounce_buffer(void) stub_complete_io(1); CU_ASSERT(g_io_done == true); + /* Verify the request is queued after receiving ENOMEM from pull */ + g_io_done = false; + aux_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 1); + ut_expected_io_set_iov(aux_io, 0, iov.iov_base, iov.iov_len); + TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, aux_io, link); + rc = spdk_bdev_writev_blocks(desc, io_ch, &iov, 1, 32, 14, io_done, NULL); + CU_ASSERT(rc == 0); + CU_ASSERT(g_io_done == false); + CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); + + expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 1); + ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len); + TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link); + + MOCK_SET(spdk_memory_domain_pull_data, -ENOMEM); + rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts); + CU_ASSERT(rc == 0); + CU_ASSERT(g_io_done == false); + /* The second IO has been queued */ + CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); + + MOCK_CLEAR(spdk_memory_domain_pull_data); + g_memory_domain_pull_data_called = false; + stub_complete_io(1); + CU_ASSERT(g_io_done == true); + CU_ASSERT(g_memory_domain_pull_data_called == true); + /* The second IO should be submitted now */ + CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); + g_io_done = false; + stub_complete_io(1); + CU_ASSERT(g_io_done == true); + spdk_put_io_channel(io_ch); spdk_bdev_close(desc); free_bdev(bdev);