thread: defer unregistration when for_each ops exist

There may be for_each operations outstanding on an
io_device when it is unregistered.  Currently we just
return when this happens, not unregistering the
device but also not notifying the caller that this
happened (since it returns void, and the callback
function doesn't have a status parameter either).

We could just push this responsibility to the caller,
to never unregister an io_device if it knows it has
outstanding for_each calls waiting to complete.  But
I think we can simplify this a lot by just handling
this inside of the thread library.  Mark that the
device is pending registration, and unregister it
(on the original requesting thread!) when the
for_each count gets back to zero.  Also don't
allow any new for_each operations either.

Note this requires a bit of refactoring on the
thread unit tests, since it is now possible to
unregister a device with outstanding for_each
operations.

Fixes issue #2631.

Signed-off-by: Jim Harris <james.r.harris@intel.com>
Change-Id: I978f2d99a25e65d2b7d71ce9b1926a79a6c94263
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13890
Community-CI: Mellanox Build Bot
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
This commit is contained in:
Jim Harris 2022-08-04 20:50:01 +00:00 committed by Tomasz Zawadzki
parent 821e673c1d
commit d33497d3f4
2 changed files with 61 additions and 16 deletions

View File

@ -161,6 +161,7 @@ struct io_device {
uint32_t refcnt;
bool pending_unregister;
bool unregistered;
};
@ -2044,18 +2045,31 @@ spdk_io_device_unregister(void *io_device, spdk_io_device_unregister_cb unregist
return;
}
if (dev->for_each_count > 0) {
SPDK_ERRLOG("io_device %s (%p) has %u for_each calls outstanding\n",
dev->name, io_device, dev->for_each_count);
/* The for_each_count check differentiates the user attempting to unregister the
* device a second time, from the internal call to this function that occurs
* after the for_each_count reaches 0.
*/
if (dev->pending_unregister && dev->for_each_count > 0) {
SPDK_ERRLOG("io_device %p already has a pending unregister\n", io_device);
assert(false);
pthread_mutex_unlock(&g_devlist_mutex);
return;
}
dev->unregister_cb = unregister_cb;
dev->unregister_thread = thread;
if (dev->for_each_count > 0) {
SPDK_WARNLOG("io_device %s (%p) has %u for_each calls outstanding\n",
dev->name, io_device, dev->for_each_count);
dev->pending_unregister = true;
pthread_mutex_unlock(&g_devlist_mutex);
return;
}
dev->unregistered = true;
RB_REMOVE(io_device_tree, &g_io_devices, dev);
refcnt = dev->refcnt;
dev->unregister_thread = thread;
pthread_mutex_unlock(&g_devlist_mutex);
SPDK_DEBUGLOG(thread, "Unregistering io_device %s (%p) from thread %s\n",
@ -2385,6 +2399,15 @@ spdk_for_each_channel(void *io_device, spdk_channel_msg fn, void *ctx,
goto end;
}
/* Do not allow new for_each operations if we are already waiting to unregister
* the device for other for_each operations to complete.
*/
if (i->dev->pending_unregister) {
SPDK_ERRLOG("io_device %p has a pending unregister\n", io_device);
i->status = -ENODEV;
goto end;
}
TAILQ_FOREACH(thread, &g_threads, tailq) {
ch = thread_get_io_channel(thread, i->dev);
if (ch != NULL) {
@ -2405,11 +2428,22 @@ end:
assert(rc == 0);
}
static void
__pending_unregister(void *arg)
{
struct io_device *dev = arg;
assert(dev->pending_unregister);
assert(dev->for_each_count == 0);
spdk_io_device_unregister(dev->io_device, dev->unregister_cb);
}
void
spdk_for_each_channel_continue(struct spdk_io_channel_iter *i, int status)
{
struct spdk_thread *thread;
struct spdk_io_channel *ch;
struct io_device *dev;
int rc __attribute__((unused));
assert(i->cur_thread == spdk_get_thread());
@ -2417,12 +2451,14 @@ spdk_for_each_channel_continue(struct spdk_io_channel_iter *i, int status)
i->status = status;
pthread_mutex_lock(&g_devlist_mutex);
dev = i->dev;
if (status) {
goto end;
}
thread = TAILQ_NEXT(i->cur_thread, tailq);
while (thread) {
ch = thread_get_io_channel(thread, i->dev);
ch = thread_get_io_channel(thread, dev);
if (ch != NULL) {
i->cur_thread = thread;
i->ch = ch;
@ -2435,12 +2471,19 @@ spdk_for_each_channel_continue(struct spdk_io_channel_iter *i, int status)
}
end:
i->dev->for_each_count--;
dev->for_each_count--;
i->ch = NULL;
pthread_mutex_unlock(&g_devlist_mutex);
rc = spdk_thread_send_msg(i->orig_thread, _call_completion, i);
assert(rc == 0);
pthread_mutex_lock(&g_devlist_mutex);
if (dev->pending_unregister && dev->for_each_count == 0) {
rc = spdk_thread_send_msg(dev->unregister_thread, __pending_unregister, dev);
assert(rc == 0);
}
pthread_mutex_unlock(&g_devlist_mutex);
}
struct spdk_interrupt {

View File

@ -624,15 +624,9 @@ for_each_channel_unreg(void)
SPDK_CU_ASSERT_FATAL(dev != NULL);
CU_ASSERT(RB_NEXT(io_device_tree, &g_io_devices, dev) == NULL);
ch0 = spdk_get_io_channel(&io_target);
spdk_for_each_channel(&io_target, unreg_ch_done, &ctx, unreg_foreach_done);
spdk_io_device_unregister(&io_target, NULL);
/*
* There is an outstanding foreach call on the io_device, so the unregister should not
* have removed the device.
*/
CU_ASSERT(dev == RB_MIN(io_device_tree, &g_io_devices));
spdk_io_device_register(&io_target, channel_create, channel_destroy, sizeof(int), NULL);
/*
* There is already a device registered at &io_target, so a new io_device should not
* have been added to g_io_devices.
@ -640,14 +634,22 @@ for_each_channel_unreg(void)
CU_ASSERT(dev == RB_MIN(io_device_tree, &g_io_devices));
CU_ASSERT(RB_NEXT(io_device_tree, &g_io_devices, dev) == NULL);
spdk_for_each_channel(&io_target, unreg_ch_done, &ctx, unreg_foreach_done);
spdk_io_device_unregister(&io_target, NULL);
/*
* There is an outstanding foreach call on the io_device, so the unregister should not
* have immediately removed the device.
*/
CU_ASSERT(dev == RB_MIN(io_device_tree, &g_io_devices));
poll_thread(0);
CU_ASSERT(ctx.ch_done == true);
CU_ASSERT(ctx.foreach_done == true);
/*
* There are no more foreach operations outstanding, so we can unregister the device,
* even though a channel still exists for the device.
* There are no more foreach operations outstanding, so the device should be
* unregistered.
*/
spdk_io_device_unregister(&io_target, NULL);
CU_ASSERT(RB_EMPTY(&g_io_devices));
set_thread(0);