bdev/nvme: Fix race between for_each_channel() and io_device_unregister()

If a nvme_ctrlr is unregistered while I/O path caches are clearing, the
unregistration would fail. This race was not considered for a case that
a nvme_ctrlr is unregsitered when adminq poller started I/O path cache
clearing.

As a bug fix, control ANA log page updating and I/O path cache clearing
separately by adding a new flag io_path_cache_clearing.

Fixes issue #2617

Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: Id58684caf9b2a10fa68bdb717288ff2bd799c3f7
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13924
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Dong Yi <dongx.yi@intel.com>
This commit is contained in:
Shuhei Matsumoto 2022-08-09 10:46:48 +09:00 committed by Tomasz Zawadzki
parent d0cf194bc4
commit a4ebc5714f
2 changed files with 35 additions and 8 deletions

View File

@ -517,6 +517,10 @@ nvme_ctrlr_can_be_unregistered(struct nvme_ctrlr *nvme_ctrlr)
return false;
}
if (nvme_ctrlr->io_path_cache_clearing) {
return false;
}
return true;
}
@ -1131,14 +1135,14 @@ bdev_nvme_admin_passthru_complete(struct nvme_bdev_io *bio, int rc)
}
static void
_nvme_ctrlr_read_ana_log_page_done(struct spdk_io_channel_iter *i, int status)
bdev_nvme_clear_io_path_caches_done(struct spdk_io_channel_iter *i, int status)
{
struct nvme_ctrlr *nvme_ctrlr = spdk_io_channel_iter_get_io_device(i);
pthread_mutex_lock(&nvme_ctrlr->mutex);
assert(nvme_ctrlr->ana_log_page_updating == true);
nvme_ctrlr->ana_log_page_updating = false;
assert(nvme_ctrlr->io_path_cache_clearing == true);
nvme_ctrlr->io_path_cache_clearing = false;
if (!nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
pthread_mutex_unlock(&nvme_ctrlr->mutex);
@ -1174,13 +1178,22 @@ bdev_nvme_clear_io_path_cache(struct spdk_io_channel_iter *i)
}
static void
bdev_nvme_clear_io_path_caches(struct nvme_ctrlr *nvme_ctrlr,
spdk_channel_for_each_cpl cpl)
bdev_nvme_clear_io_path_caches(struct nvme_ctrlr *nvme_ctrlr)
{
pthread_mutex_lock(&nvme_ctrlr->mutex);
if (!nvme_ctrlr_is_available(nvme_ctrlr) ||
nvme_ctrlr->io_path_cache_clearing) {
pthread_mutex_unlock(&nvme_ctrlr->mutex);
return;
}
nvme_ctrlr->io_path_cache_clearing = true;
pthread_mutex_unlock(&nvme_ctrlr->mutex);
spdk_for_each_channel(nvme_ctrlr,
bdev_nvme_clear_io_path_cache,
NULL,
cpl);
bdev_nvme_clear_io_path_caches_done);
}
static struct nvme_qpair *
@ -1323,7 +1336,7 @@ bdev_nvme_poll_adminq(void *arg)
}
} else if (spdk_nvme_ctrlr_get_admin_qp_failure_reason(nvme_ctrlr->ctrlr) !=
SPDK_NVME_QPAIR_FAILURE_NONE) {
bdev_nvme_clear_io_path_caches(nvme_ctrlr, NULL);
bdev_nvme_clear_io_path_caches(nvme_ctrlr);
}
return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY;
@ -3551,7 +3564,20 @@ nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl)
bdev_nvme_disable_read_ana_log_page(nvme_ctrlr);
}
bdev_nvme_clear_io_path_caches(nvme_ctrlr, _nvme_ctrlr_read_ana_log_page_done);
pthread_mutex_lock(&nvme_ctrlr->mutex);
assert(nvme_ctrlr->ana_log_page_updating == true);
nvme_ctrlr->ana_log_page_updating = false;
if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
pthread_mutex_unlock(&nvme_ctrlr->mutex);
nvme_ctrlr_unregister(nvme_ctrlr);
} else {
pthread_mutex_unlock(&nvme_ctrlr->mutex);
bdev_nvme_clear_io_path_caches(nvme_ctrlr);
}
}
static int

View File

@ -99,6 +99,7 @@ struct nvme_ctrlr {
uint32_t fast_io_fail_timedout : 1;
uint32_t destruct : 1;
uint32_t ana_log_page_updating : 1;
uint32_t io_path_cache_clearing : 1;
struct nvme_ctrlr_opts opts;