diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 0be796cb6..08211981b 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -368,6 +368,8 @@ struct spdk_bdev_io_error_stat { enum bdev_io_retry_state { BDEV_IO_RETRY_STATE_INVALID, + BDEV_IO_RETRY_STATE_PULL, + BDEV_IO_RETRY_STATE_PULL_MD, BDEV_IO_RETRY_STATE_SUBMIT, }; @@ -414,6 +416,8 @@ static bool claim_type_is_v2(enum spdk_bdev_claim_type type); static void bdev_desc_release_claims(struct spdk_bdev_desc *desc); static void claim_reset(struct spdk_bdev *bdev); +static void bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch); + #define bdev_get_ext_io_opt(opts, field, defval) \ (((opts) != NULL && offsetof(struct spdk_bdev_ext_io_opts, field) + \ sizeof((opts)->field) <= sizeof(*(opts))) ? (opts)->field : (defval)) @@ -1061,6 +1065,11 @@ bdev_io_exec_sequence_cb(void *ctx, int status) TAILQ_REMOVE(&bdev_io->internal.ch->io_accel_exec, bdev_io, internal.link); bdev_io_decrement_outstanding(ch, ch->shared_resource); + + if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) { + bdev_ch_retry_io(ch); + } + bdev_io->internal.data_transfer_cpl(bdev_io, status); } @@ -1126,6 +1135,11 @@ bdev_io_pull_md_buf_done(void *ctx, int status) TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link); bdev_io_decrement_outstanding(ch, ch->shared_resource); + + if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) { + bdev_ch_retry_io(ch); + } + assert(bdev_io->internal.data_transfer_cpl); bdev_io->internal.data_transfer_cpl(bdev_io, status); } @@ -1151,8 +1165,11 @@ bdev_io_pull_md_buf(struct spdk_bdev_io *bdev_io) } bdev_io_decrement_outstanding(ch, ch->shared_resource); TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link); - SPDK_ERRLOG("Failed to pull data from memory domain %s, rc %d\n", - spdk_memory_domain_get_dma_device_id(bdev_io->internal.memory_domain), rc); + if (rc != -ENOMEM) { + SPDK_ERRLOG("Failed to pull data from memory domain %s, rc %d\n", + spdk_memory_domain_get_dma_device_id( + bdev_io->internal.memory_domain), rc); + } } else { memcpy(bdev_io->internal.bounce_md_iov.iov_base, bdev_io->internal.orig_md_iov.iov_base, @@ -1160,8 +1177,12 @@ bdev_io_pull_md_buf(struct spdk_bdev_io *bdev_io) } } - assert(bdev_io->internal.data_transfer_cpl); - bdev_io->internal.data_transfer_cpl(bdev_io, rc); + if (spdk_unlikely(rc == -ENOMEM)) { + bdev_queue_nomem_io_head(ch->shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PULL_MD); + } else { + assert(bdev_io->internal.data_transfer_cpl); + bdev_io->internal.data_transfer_cpl(bdev_io, rc); + } } static void @@ -1228,6 +1249,10 @@ _bdev_io_pull_bounce_data_buf_done(void *ctx, int status) TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link); bdev_io_decrement_outstanding(ch, ch->shared_resource); + if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) { + bdev_ch_retry_io(ch); + } + bdev_io_pull_bounce_data_buf_done(ctx, status); } @@ -1261,7 +1286,7 @@ bdev_io_pull_data(struct spdk_bdev_io *bdev_io) NULL, NULL, 0, NULL, NULL); } - if (spdk_unlikely(rc != 0)) { + if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) { SPDK_ERRLOG("Failed to append copy to accel sequence: %p\n", bdev_io->internal.accel_sequence); } @@ -1283,8 +1308,11 @@ bdev_io_pull_data(struct spdk_bdev_io *bdev_io) } TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link); bdev_io_decrement_outstanding(ch, ch->shared_resource); - SPDK_ERRLOG("Failed to pull data from memory domain %s\n", - spdk_memory_domain_get_dma_device_id(bdev_io->internal.memory_domain)); + if (rc != -ENOMEM) { + SPDK_ERRLOG("Failed to pull data from memory domain %s\n", + spdk_memory_domain_get_dma_device_id( + bdev_io->internal.memory_domain)); + } } else { assert(bdev_io->u.bdev.iovcnt == 1); spdk_copy_iovs_to_buf(bdev_io->u.bdev.iovs[0].iov_base, @@ -1294,13 +1322,19 @@ bdev_io_pull_data(struct spdk_bdev_io *bdev_io) } } - bdev_io_pull_bounce_data_buf_done(bdev_io, rc); + if (spdk_unlikely(rc == -ENOMEM)) { + bdev_queue_nomem_io_head(ch->shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PULL); + } else { + bdev_io_pull_bounce_data_buf_done(bdev_io, rc); + } } static void _bdev_io_pull_bounce_data_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len, bdev_copy_bounce_buffer_cpl cpl_cb) { + struct spdk_bdev_shared_resource *shared_resource = bdev_io->internal.ch->shared_resource; + bdev_io->internal.data_transfer_cpl = cpl_cb; /* save original iovec */ bdev_io->internal.orig_iovs = bdev_io->u.bdev.iovs; @@ -1312,7 +1346,11 @@ _bdev_io_pull_bounce_data_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t le bdev_io->u.bdev.iovs[0].iov_base = buf; bdev_io->u.bdev.iovs[0].iov_len = len; - bdev_io_pull_data(bdev_io); + if (spdk_unlikely(!TAILQ_EMPTY(&shared_resource->nomem_io))) { + bdev_queue_nomem_io_tail(shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PULL); + } else { + bdev_io_pull_data(bdev_io); + } } static void @@ -1438,6 +1476,12 @@ bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch) case BDEV_IO_RETRY_STATE_SUBMIT: bdev_ch_resubmit_io(bdev_ch, bdev_io); break; + case BDEV_IO_RETRY_STATE_PULL: + bdev_io_pull_data(bdev_io); + break; + case BDEV_IO_RETRY_STATE_PULL_MD: + bdev_io_pull_md_buf(bdev_io); + break; default: assert(0 && "invalid retry state"); break; diff --git a/test/unit/lib/bdev/bdev.c/bdev_ut.c b/test/unit/lib/bdev/bdev.c/bdev_ut.c index 7291f1273..56f1af2c8 100644 --- a/test/unit/lib/bdev/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/bdev.c/bdev_ut.c @@ -5795,7 +5795,7 @@ bdev_io_ext_bounce_buffer(void) struct spdk_io_channel *io_ch; char io_buf[512]; struct iovec iov = { .iov_base = io_buf, .iov_len = 512 }; - struct ut_expected_io *expected_io; + struct ut_expected_io *expected_io, *aux_io; struct spdk_bdev_ext_io_opts ext_io_opts = { .metadata = (void *)0xFF000000, .size = sizeof(ext_io_opts) @@ -5849,6 +5849,38 @@ bdev_io_ext_bounce_buffer(void) stub_complete_io(1); CU_ASSERT(g_io_done == true); + /* Verify the request is queued after receiving ENOMEM from pull */ + g_io_done = false; + aux_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 1); + ut_expected_io_set_iov(aux_io, 0, iov.iov_base, iov.iov_len); + TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, aux_io, link); + rc = spdk_bdev_writev_blocks(desc, io_ch, &iov, 1, 32, 14, io_done, NULL); + CU_ASSERT(rc == 0); + CU_ASSERT(g_io_done == false); + CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); + + expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 1); + ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len); + TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link); + + MOCK_SET(spdk_memory_domain_pull_data, -ENOMEM); + rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts); + CU_ASSERT(rc == 0); + CU_ASSERT(g_io_done == false); + /* The second IO has been queued */ + CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); + + MOCK_CLEAR(spdk_memory_domain_pull_data); + g_memory_domain_pull_data_called = false; + stub_complete_io(1); + CU_ASSERT(g_io_done == true); + CU_ASSERT(g_memory_domain_pull_data_called == true); + /* The second IO should be submitted now */ + CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1); + g_io_done = false; + stub_complete_io(1); + CU_ASSERT(g_io_done == true); + spdk_put_io_channel(io_ch); spdk_bdev_close(desc); free_bdev(bdev);