bdev: properly handle aborted resets

Previously, we naively assumed that a completed
reset was the reset in progress, and would
unilaterally set reset_in_progres to false.

So change reset_in_progress to a bdev_io pointer
instead.  If this is not NULL, a reset is not in
progress.  Then when a reset completes, we only
set the reset_in_progress pointer to NULL if we
are completing the reset that is in progress.

We also were not aborting queued resets when
destroying a channel so that is fixed here too.

The added unit test covers both fixes above - it will
submit two resets on a different channels, then destroy
the second channel.  This will abort the second reset
and check that the bdev still sees the first reset as in
progress.

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: I61df677cfa272c589ca03cb81753f71b0807a182

Reviewed-on: https://review.gerrithub.io/378199
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Dariusz Stojaczyk <dariuszx.stojaczyk@intel.com>
Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
This commit is contained in:
Jim Harris 2017-09-12 12:14:48 -07:00 committed by Daniel Verkamp
parent ab29d2ce5d
commit 1f935c7a9b
3 changed files with 102 additions and 13 deletions

View File

@ -235,8 +235,8 @@ struct spdk_bdev {
TAILQ_ENTRY(spdk_bdev) link;
/** denotes if a reset is currently in progress on this bdev */
bool reset_in_progress;
/** points to a reset bdev_io if one is in progress. */
struct spdk_bdev_io *reset_in_progress;
};
typedef void (*spdk_bdev_io_get_buf_cb)(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io);

View File

@ -690,8 +690,12 @@ spdk_bdev_channel_create(void *io_device, void *ctx_buf)
return 0;
}
/*
* Abort I/O that are waiting on a data buffer. These types of I/O are
* linked using the spdk_bdev_io buf_link TAILQ_ENTRY.
*/
static void
_spdk_bdev_abort_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch)
_spdk_bdev_abort_buf_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch)
{
struct spdk_bdev_io *bdev_io, *tmp;
@ -703,6 +707,23 @@ _spdk_bdev_abort_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch)
}
}
/*
* Abort I/O that are queued waiting for submission. These types of I/O are
* linked using the spdk_bdev_io link TAILQ_ENTRY.
*/
static void
_spdk_bdev_abort_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch)
{
struct spdk_bdev_io *bdev_io, *tmp;
TAILQ_FOREACH_SAFE(bdev_io, queue, link, tmp) {
if (bdev_io->ch == ch) {
TAILQ_REMOVE(queue, bdev_io, link);
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
}
}
}
static void
spdk_bdev_channel_destroy(void *io_device, void *ctx_buf)
{
@ -711,8 +732,9 @@ spdk_bdev_channel_destroy(void *io_device, void *ctx_buf)
mgmt_channel = spdk_io_channel_get_ctx(ch->mgmt_channel);
_spdk_bdev_abort_io(&mgmt_channel->need_buf_small, ch);
_spdk_bdev_abort_io(&mgmt_channel->need_buf_large, ch);
_spdk_bdev_abort_queued_io(&ch->queued_resets, ch);
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_small, ch);
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_large, ch);
spdk_put_io_channel(ch->channel);
spdk_put_io_channel(ch->mgmt_channel);
@ -1165,8 +1187,8 @@ _spdk_bdev_reset_abort_channel(void *io_device, struct spdk_io_channel *ch,
channel = spdk_io_channel_get_ctx(ch);
mgmt_channel = spdk_io_channel_get_ctx(channel->mgmt_channel);
_spdk_bdev_abort_io(&mgmt_channel->need_buf_small, channel);
_spdk_bdev_abort_io(&mgmt_channel->need_buf_large, channel);
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_small, channel);
_spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_large, channel);
}
static void
@ -1186,8 +1208,8 @@ _spdk_bdev_channel_start_reset(struct spdk_bdev_channel *ch)
assert(!TAILQ_EMPTY(&ch->queued_resets));
pthread_mutex_lock(&bdev->mutex);
if (!bdev->reset_in_progress) {
bdev->reset_in_progress = true;
if (bdev->reset_in_progress == NULL) {
bdev->reset_in_progress = TAILQ_FIRST(&ch->queued_resets);
/*
* Take a channel reference for the target bdev for the life of this
* reset. This guards against the channel getting destroyed while
@ -1195,7 +1217,7 @@ _spdk_bdev_channel_start_reset(struct spdk_bdev_channel *ch)
* progress. We will release the reference when this reset is
* completed.
*/
TAILQ_FIRST(&ch->queued_resets)->u.reset.ch_ref = spdk_get_io_channel(bdev);
bdev->reset_in_progress->u.reset.ch_ref = spdk_get_io_channel(bdev);
_spdk_bdev_start_reset(ch);
}
pthread_mutex_unlock(&bdev->mutex);
@ -1357,7 +1379,9 @@ spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status sta
if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_RESET)) {
pthread_mutex_lock(&bdev_io->bdev->mutex);
bdev_io->bdev->reset_in_progress = false;
if (bdev_io == bdev_io->bdev->reset_in_progress) {
bdev_io->bdev->reset_in_progress = NULL;
}
pthread_mutex_unlock(&bdev_io->bdev->mutex);
if (bdev_io->u.reset.ch_ref != NULL) {
spdk_put_io_channel(bdev_io->u.reset.ch_ref);
@ -1513,7 +1537,7 @@ _spdk_bdev_register(struct spdk_bdev *bdev)
TAILQ_INIT(&bdev->vbdevs);
TAILQ_INIT(&bdev->base_bdevs);
bdev->reset_in_progress = false;
bdev->reset_in_progress = NULL;
spdk_io_device_register(bdev, spdk_bdev_channel_create, spdk_bdev_channel_destroy,
sizeof(struct spdk_bdev_channel));

View File

@ -225,6 +225,70 @@ put_channel_during_reset(void)
teardown_test();
}
static void
aborted_reset_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
enum spdk_bdev_io_status *status = cb_arg;
*status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
spdk_bdev_free_io(bdev_io);
}
static void
aborted_reset(void)
{
struct spdk_io_channel *io_ch[2];
enum spdk_bdev_io_status status1, status2;
setup_test();
set_thread(0);
io_ch[0] = spdk_bdev_get_io_channel(g_desc);
CU_ASSERT(io_ch[0] != NULL);
spdk_bdev_reset(g_desc, io_ch[0], aborted_reset_done, &status1);
poll_threads();
CU_ASSERT(g_bdev.bdev.reset_in_progress != NULL);
/*
* First reset has been submitted on ch0. Now submit a second
* reset on ch1 which will get queued since there is already a
* reset in progress.
*/
set_thread(1);
io_ch[1] = spdk_bdev_get_io_channel(g_desc);
CU_ASSERT(io_ch[1] != NULL);
spdk_bdev_reset(g_desc, io_ch[1], aborted_reset_done, &status2);
poll_threads();
CU_ASSERT(g_bdev.bdev.reset_in_progress != NULL);
/*
* Now destroy ch1. This will abort the queued reset. Check that
* the second reset was completed with failed status. Also check
* that bdev->reset_in_progress != NULL, since the original reset
* has not been completed yet. This ensures that the bdev code is
* correctly noticing that the failed reset is *not* the one that
* had been submitted to the bdev module.
*/
set_thread(1);
spdk_put_io_channel(io_ch[1]);
poll_threads();
CU_ASSERT(status2 == SPDK_BDEV_IO_STATUS_FAILED);
CU_ASSERT(g_bdev.bdev.reset_in_progress != NULL);
/*
* Now complete the first reset, verify that it completed with SUCCESS
* status and that bdev->reset_in_progress is also set back to NULL.
*/
set_thread(0);
spdk_put_io_channel(io_ch[0]);
stub_complete_io();
poll_threads();
CU_ASSERT(status1 == SPDK_BDEV_IO_STATUS_SUCCESS);
CU_ASSERT(g_bdev.bdev.reset_in_progress == NULL);
teardown_test();
}
int
main(int argc, char **argv)
{
@ -243,7 +307,8 @@ main(int argc, char **argv)
if (
CU_add_test(suite, "basic", basic) == NULL ||
CU_add_test(suite, "put_channel_during_reset", put_channel_during_reset) == NULL
CU_add_test(suite, "put_channel_during_reset", put_channel_during_reset) == NULL ||
CU_add_test(suite, "aborted_reset", aborted_reset) == NULL
) {
CU_cleanup_registry();
return CU_get_error();