bdev: properly handle aborted resets
Previously, we naively assumed that a completed reset was the reset in progress, and would unilaterally set reset_in_progres to false. So change reset_in_progress to a bdev_io pointer instead. If this is not NULL, a reset is not in progress. Then when a reset completes, we only set the reset_in_progress pointer to NULL if we are completing the reset that is in progress. We also were not aborting queued resets when destroying a channel so that is fixed here too. The added unit test covers both fixes above - it will submit two resets on a different channels, then destroy the second channel. This will abort the second reset and check that the bdev still sees the first reset as in progress. Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: I61df677cfa272c589ca03cb81753f71b0807a182 Reviewed-on: https://review.gerrithub.io/378199 Tested-by: SPDK Automated Test System <sys_sgsw@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Dariusz Stojaczyk <dariuszx.stojaczyk@intel.com> Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
This commit is contained in:
parent
ab29d2ce5d
commit
1f935c7a9b
@ -235,8 +235,8 @@ struct spdk_bdev {
|
||||
|
||||
TAILQ_ENTRY(spdk_bdev) link;
|
||||
|
||||
/** denotes if a reset is currently in progress on this bdev */
|
||||
bool reset_in_progress;
|
||||
/** points to a reset bdev_io if one is in progress. */
|
||||
struct spdk_bdev_io *reset_in_progress;
|
||||
};
|
||||
|
||||
typedef void (*spdk_bdev_io_get_buf_cb)(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io);
|
||||
|
@ -690,8 +690,12 @@ spdk_bdev_channel_create(void *io_device, void *ctx_buf)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Abort I/O that are waiting on a data buffer. These types of I/O are
|
||||
* linked using the spdk_bdev_io buf_link TAILQ_ENTRY.
|
||||
*/
|
||||
static void
|
||||
_spdk_bdev_abort_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch)
|
||||
_spdk_bdev_abort_buf_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch)
|
||||
{
|
||||
struct spdk_bdev_io *bdev_io, *tmp;
|
||||
|
||||
@ -703,6 +707,23 @@ _spdk_bdev_abort_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Abort I/O that are queued waiting for submission. These types of I/O are
|
||||
* linked using the spdk_bdev_io link TAILQ_ENTRY.
|
||||
*/
|
||||
static void
|
||||
_spdk_bdev_abort_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch)
|
||||
{
|
||||
struct spdk_bdev_io *bdev_io, *tmp;
|
||||
|
||||
TAILQ_FOREACH_SAFE(bdev_io, queue, link, tmp) {
|
||||
if (bdev_io->ch == ch) {
|
||||
TAILQ_REMOVE(queue, bdev_io, link);
|
||||
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
spdk_bdev_channel_destroy(void *io_device, void *ctx_buf)
|
||||
{
|
||||
@ -711,8 +732,9 @@ spdk_bdev_channel_destroy(void *io_device, void *ctx_buf)
|
||||
|
||||
mgmt_channel = spdk_io_channel_get_ctx(ch->mgmt_channel);
|
||||
|
||||
_spdk_bdev_abort_io(&mgmt_channel->need_buf_small, ch);
|
||||
_spdk_bdev_abort_io(&mgmt_channel->need_buf_large, ch);
|
||||
_spdk_bdev_abort_queued_io(&ch->queued_resets, ch);
|
||||
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_small, ch);
|
||||
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_large, ch);
|
||||
|
||||
spdk_put_io_channel(ch->channel);
|
||||
spdk_put_io_channel(ch->mgmt_channel);
|
||||
@ -1165,8 +1187,8 @@ _spdk_bdev_reset_abort_channel(void *io_device, struct spdk_io_channel *ch,
|
||||
channel = spdk_io_channel_get_ctx(ch);
|
||||
mgmt_channel = spdk_io_channel_get_ctx(channel->mgmt_channel);
|
||||
|
||||
_spdk_bdev_abort_io(&mgmt_channel->need_buf_small, channel);
|
||||
_spdk_bdev_abort_io(&mgmt_channel->need_buf_large, channel);
|
||||
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_small, channel);
|
||||
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_large, channel);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1186,8 +1208,8 @@ _spdk_bdev_channel_start_reset(struct spdk_bdev_channel *ch)
|
||||
assert(!TAILQ_EMPTY(&ch->queued_resets));
|
||||
|
||||
pthread_mutex_lock(&bdev->mutex);
|
||||
if (!bdev->reset_in_progress) {
|
||||
bdev->reset_in_progress = true;
|
||||
if (bdev->reset_in_progress == NULL) {
|
||||
bdev->reset_in_progress = TAILQ_FIRST(&ch->queued_resets);
|
||||
/*
|
||||
* Take a channel reference for the target bdev for the life of this
|
||||
* reset. This guards against the channel getting destroyed while
|
||||
@ -1195,7 +1217,7 @@ _spdk_bdev_channel_start_reset(struct spdk_bdev_channel *ch)
|
||||
* progress. We will release the reference when this reset is
|
||||
* completed.
|
||||
*/
|
||||
TAILQ_FIRST(&ch->queued_resets)->u.reset.ch_ref = spdk_get_io_channel(bdev);
|
||||
bdev->reset_in_progress->u.reset.ch_ref = spdk_get_io_channel(bdev);
|
||||
_spdk_bdev_start_reset(ch);
|
||||
}
|
||||
pthread_mutex_unlock(&bdev->mutex);
|
||||
@ -1357,7 +1379,9 @@ spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status sta
|
||||
|
||||
if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_RESET)) {
|
||||
pthread_mutex_lock(&bdev_io->bdev->mutex);
|
||||
bdev_io->bdev->reset_in_progress = false;
|
||||
if (bdev_io == bdev_io->bdev->reset_in_progress) {
|
||||
bdev_io->bdev->reset_in_progress = NULL;
|
||||
}
|
||||
pthread_mutex_unlock(&bdev_io->bdev->mutex);
|
||||
if (bdev_io->u.reset.ch_ref != NULL) {
|
||||
spdk_put_io_channel(bdev_io->u.reset.ch_ref);
|
||||
@ -1513,7 +1537,7 @@ _spdk_bdev_register(struct spdk_bdev *bdev)
|
||||
TAILQ_INIT(&bdev->vbdevs);
|
||||
TAILQ_INIT(&bdev->base_bdevs);
|
||||
|
||||
bdev->reset_in_progress = false;
|
||||
bdev->reset_in_progress = NULL;
|
||||
|
||||
spdk_io_device_register(bdev, spdk_bdev_channel_create, spdk_bdev_channel_destroy,
|
||||
sizeof(struct spdk_bdev_channel));
|
||||
|
@ -225,6 +225,70 @@ put_channel_during_reset(void)
|
||||
teardown_test();
|
||||
}
|
||||
|
||||
static void
|
||||
aborted_reset_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
|
||||
{
|
||||
enum spdk_bdev_io_status *status = cb_arg;
|
||||
|
||||
*status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
|
||||
spdk_bdev_free_io(bdev_io);
|
||||
}
|
||||
|
||||
static void
|
||||
aborted_reset(void)
|
||||
{
|
||||
struct spdk_io_channel *io_ch[2];
|
||||
enum spdk_bdev_io_status status1, status2;
|
||||
|
||||
setup_test();
|
||||
|
||||
set_thread(0);
|
||||
io_ch[0] = spdk_bdev_get_io_channel(g_desc);
|
||||
CU_ASSERT(io_ch[0] != NULL);
|
||||
spdk_bdev_reset(g_desc, io_ch[0], aborted_reset_done, &status1);
|
||||
poll_threads();
|
||||
CU_ASSERT(g_bdev.bdev.reset_in_progress != NULL);
|
||||
|
||||
/*
|
||||
* First reset has been submitted on ch0. Now submit a second
|
||||
* reset on ch1 which will get queued since there is already a
|
||||
* reset in progress.
|
||||
*/
|
||||
set_thread(1);
|
||||
io_ch[1] = spdk_bdev_get_io_channel(g_desc);
|
||||
CU_ASSERT(io_ch[1] != NULL);
|
||||
spdk_bdev_reset(g_desc, io_ch[1], aborted_reset_done, &status2);
|
||||
poll_threads();
|
||||
CU_ASSERT(g_bdev.bdev.reset_in_progress != NULL);
|
||||
|
||||
/*
|
||||
* Now destroy ch1. This will abort the queued reset. Check that
|
||||
* the second reset was completed with failed status. Also check
|
||||
* that bdev->reset_in_progress != NULL, since the original reset
|
||||
* has not been completed yet. This ensures that the bdev code is
|
||||
* correctly noticing that the failed reset is *not* the one that
|
||||
* had been submitted to the bdev module.
|
||||
*/
|
||||
set_thread(1);
|
||||
spdk_put_io_channel(io_ch[1]);
|
||||
poll_threads();
|
||||
CU_ASSERT(status2 == SPDK_BDEV_IO_STATUS_FAILED);
|
||||
CU_ASSERT(g_bdev.bdev.reset_in_progress != NULL);
|
||||
|
||||
/*
|
||||
* Now complete the first reset, verify that it completed with SUCCESS
|
||||
* status and that bdev->reset_in_progress is also set back to NULL.
|
||||
*/
|
||||
set_thread(0);
|
||||
spdk_put_io_channel(io_ch[0]);
|
||||
stub_complete_io();
|
||||
poll_threads();
|
||||
CU_ASSERT(status1 == SPDK_BDEV_IO_STATUS_SUCCESS);
|
||||
CU_ASSERT(g_bdev.bdev.reset_in_progress == NULL);
|
||||
|
||||
teardown_test();
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
@ -243,7 +307,8 @@ main(int argc, char **argv)
|
||||
|
||||
if (
|
||||
CU_add_test(suite, "basic", basic) == NULL ||
|
||||
CU_add_test(suite, "put_channel_during_reset", put_channel_during_reset) == NULL
|
||||
CU_add_test(suite, "put_channel_during_reset", put_channel_during_reset) == NULL ||
|
||||
CU_add_test(suite, "aborted_reset", aborted_reset) == NULL
|
||||
) {
|
||||
CU_cleanup_registry();
|
||||
return CU_get_error();
|
||||
|
Loading…
Reference in New Issue
Block a user