From 2bc134eb4bfe9aaa378a9944aa7cb89ef1e4d6a4 Mon Sep 17 00:00:00 2001 From: Alex Michon Date: Wed, 6 Apr 2022 14:43:21 +0200 Subject: [PATCH] bdev/nvme: Fix aborting fuse commands When sending a fused compare and write command, we pass a callback bdev_nvme_comparev_and_writev_done that we expect to be called twice before marking the io as completed. In order to detect if a call to bdev_nvme_comparev_and_writev_done is the first or the second one, we currently rely on the opcode in cdw0. However, cdw0 may be set to 0, especially when aborting the command. This may cause use-after-free issues and this may call the user callbacks twice instead of once. Use a bit in the nvme_bdev_io instead to keep track of the number of calls to bdev_nvme_comparev_and_writev_done. Signed-off-by: Alex Michon Change-Id: I0474329e87648e44b08998d0552b2a9dd5d34ac2 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12180 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Konrad Sztyber Reviewed-by: Jim Harris --- module/bdev/nvme/bdev_nvme.c | 7 ++++- .../lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c | 29 ++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index 66afd5395..aabf6c3c0 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -102,6 +102,9 @@ struct nvme_bdev_io { /** Keeps track if first of fused commands was submitted */ bool first_fused_submitted; + /** Keeps track if first of fused commands was completed */ + bool first_fused_completed; + /** Temporary pointer to zone report buffer */ struct spdk_nvme_zns_zone_report *zone_report_buf; @@ -5153,9 +5156,10 @@ bdev_nvme_comparev_and_writev_done(void *ref, const struct spdk_nvme_cpl *cpl) struct nvme_bdev_io *bio = ref; /* Compare operation completion */ - if ((cpl->cdw0 & 0xFF) == SPDK_NVME_OPC_COMPARE) { + if (!bio->first_fused_completed) { /* Save compare result for write callback */ bio->cpl = *cpl; + bio->first_fused_completed = true; return; } @@ -5688,6 +5692,7 @@ bdev_nvme_comparev_and_writev(struct nvme_bdev_io *bio, struct iovec *cmp_iov, i if (bdev_io->num_retries == 0) { bio->first_fused_submitted = false; + bio->first_fused_completed = false; } if (!bio->first_fused_submitted) { diff --git a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c index 54c21f351..2e5db8221 100644 --- a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c +++ b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c @@ -2480,7 +2480,7 @@ test_abort(void) const int STRING_SIZE = 32; const char *attached_names[STRING_SIZE]; struct nvme_bdev *bdev; - struct spdk_bdev_io *write_io, *admin_io, *abort_io; + struct spdk_bdev_io *write_io, *fuse_io, *admin_io, *abort_io; struct spdk_io_channel *ch1, *ch2; struct nvme_bdev_channel *nbdev_ch1; struct nvme_io_path *io_path1; @@ -2521,6 +2521,9 @@ test_abort(void) write_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_WRITE, bdev, NULL); ut_bdev_io_set_buf(write_io); + fuse_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE, bdev, NULL); + ut_bdev_io_set_buf(fuse_io); + admin_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_NVME_ADMIN, bdev, NULL); admin_io->u.nvme_passthru.cmd.opc = SPDK_NVME_OPC_GET_FEATURES; @@ -2542,6 +2545,7 @@ test_abort(void) SPDK_CU_ASSERT_FATAL(ch2 != NULL); write_io->internal.ch = (struct spdk_bdev_channel *)ch1; + fuse_io->internal.ch = (struct spdk_bdev_channel *)ch1; abort_io->internal.ch = (struct spdk_bdev_channel *)ch1; /* Aborting the already completed request should fail. */ @@ -2606,6 +2610,28 @@ test_abort(void) CU_ASSERT(write_io->internal.status == SPDK_BDEV_IO_STATUS_ABORTED); CU_ASSERT(nvme_qpair1->qpair->num_outstanding_reqs == 0); + /* Aborting the fuse request should succeed. */ + fuse_io->internal.in_submit_request = true; + bdev_nvme_submit_request(ch1, fuse_io); + + CU_ASSERT(fuse_io->internal.in_submit_request == true); + CU_ASSERT(nvme_qpair1->qpair->num_outstanding_reqs == 2); + + abort_io->u.abort.bio_to_abort = fuse_io; + abort_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch1, abort_io); + + spdk_delay_us(10000); + poll_threads(); + + CU_ASSERT(abort_io->internal.in_submit_request == false); + CU_ASSERT(abort_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS); + CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0); + CU_ASSERT(fuse_io->internal.in_submit_request == false); + CU_ASSERT(fuse_io->internal.status == SPDK_BDEV_IO_STATUS_ABORTED); + CU_ASSERT(nvme_qpair1->qpair->num_outstanding_reqs == 0); + /* Aborting the admin request should succeed. */ admin_io->internal.in_submit_request = true; bdev_nvme_submit_request(ch1, admin_io); @@ -2675,6 +2701,7 @@ test_abort(void) poll_threads(); free(write_io); + free(fuse_io); free(admin_io); free(abort_io);