lib/thread: Defer exiting thread if thread is unregistering io_device

Current SPDK thread library has a issue which occurs if there is
a race between exiting thread and unregistering io_device.

For example, there are two threads. Thread 1 registers a device
and thread 2 gets a channel of the device. Then if thread 1 starts
exiting and unregisters the device, and then thread 2 puts the channel,
thread 2 sends a message to thread 1 to complete releasing the device,
thread 1 already moved exited. Hence thread 2 failed to send the
message.

This patch fixes the race issue. The code is verified by adding
a unit test case.

In detail, add a count, unregistering_dev, to struct spdk_thread,
increment it if a callback is specified to spdk_io_device_unregister(),
and then decrement it in _finish_unregister(), and thread_exit()
checks if it is zero.

The contents of struct spdk_thread is changed but it is not public
data structure, and hence suppress it for ABI testing.

Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Change-Id: Idf5faa55335c3ea89f47ccce32687a6be2e26c68
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5796
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: <dongx.yi@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
This commit is contained in:
Shuhei Matsumoto 2021-01-06 08:56:15 +09:00 committed by Tomasz Zawadzki
parent ad583422ce
commit 6fdc71ec18
4 changed files with 115 additions and 1 deletions

View File

@ -119,6 +119,7 @@ struct spdk_thread {
spdk_msg_fn critical_msg;
uint64_t id;
enum spdk_thread_state state;
int pending_unregister_count;
TAILQ_HEAD(, spdk_io_channel) io_channels;
TAILQ_ENTRY(spdk_thread) tailq;

View File

@ -393,6 +393,13 @@ thread_exit(struct spdk_thread *thread, uint64_t now)
return;
}
if (thread->pending_unregister_count > 0) {
SPDK_INFOLOG(thread,
"thread %s is still unregistering io_devices\n",
thread->name);
return;
}
exited:
thread->state = SPDK_THREAD_STATE_EXITED;
}
@ -1394,9 +1401,16 @@ static void
_finish_unregister(void *arg)
{
struct io_device *dev = arg;
struct spdk_thread *thread;
thread = spdk_get_thread();
assert(thread == dev->unregister_thread);
SPDK_DEBUGLOG(thread, "Finishing unregistration of io_device %s (%p) on thread %s\n",
dev->name, dev->io_device, dev->unregister_thread->name);
dev->name, dev->io_device, thread->name);
assert(thread->pending_unregister_count > 0);
thread->pending_unregister_count--;
dev->unregister_cb(dev->io_device);
free(dev);
@ -1463,6 +1477,10 @@ spdk_io_device_unregister(void *io_device, spdk_io_device_unregister_cb unregist
SPDK_DEBUGLOG(thread, "Unregistering io_device %s (%p) from thread %s\n",
dev->name, dev->io_device, thread->name);
if (unregister_cb) {
thread->pending_unregister_count++;
}
if (refcnt > 0) {
/* defer deletion */
return;

View File

@ -58,6 +58,8 @@ function confirm_abi_deps() {
name = spdk_env_opts
[suppress_type]
name = spdk_app_opts
[suppress_type]
name = spdk_thread
EOF
for object in "$libdir"/libspdk_*.so; do

View File

@ -1237,6 +1237,98 @@ nested_channel(void)
CU_ASSERT(TAILQ_EMPTY(&g_threads));
}
static int
create_cb2(void *io_device, void *ctx_buf)
{
uint64_t *devcnt = (uint64_t *)io_device;
*devcnt += 1;
return 0;
}
static void
destroy_cb2(void *io_device, void *ctx_buf)
{
uint64_t *devcnt = (uint64_t *)io_device;
CU_ASSERT(*devcnt > 0);
*devcnt -= 1;
}
static void
unregister_cb2(void *io_device)
{
uint64_t *devcnt = (uint64_t *)io_device;
CU_ASSERT(*devcnt == 0);
}
static void
device_unregister_and_thread_exit_race(void)
{
uint64_t device = 0;
struct spdk_io_channel *ch1, *ch2;
struct spdk_thread *thread1, *thread2;
/* Create two threads and each thread gets a channel from the same device. */
allocate_threads(2);
set_thread(0);
thread1 = spdk_get_thread();
SPDK_CU_ASSERT_FATAL(thread1 != NULL);
spdk_io_device_register(&device, create_cb2, destroy_cb2, sizeof(uint64_t), NULL);
ch1 = spdk_get_io_channel(&device);
SPDK_CU_ASSERT_FATAL(ch1 != NULL);
set_thread(1);
thread2 = spdk_get_thread();
SPDK_CU_ASSERT_FATAL(thread2 != NULL);
ch2 = spdk_get_io_channel(&device);
SPDK_CU_ASSERT_FATAL(ch2 != NULL);
set_thread(0);
/* Move thread 0 to the exiting state, but it should keep exiting until two channels
* and a device are released.
*/
spdk_thread_exit(thread1);
poll_thread(0);
spdk_put_io_channel(ch1);
spdk_io_device_unregister(&device, unregister_cb2);
poll_thread(0);
CU_ASSERT(spdk_thread_is_exited(thread1) == false);
set_thread(1);
/* Move thread 1 to the exiting state, but it should keep exiting until its channel
* is released.
*/
spdk_thread_exit(thread2);
poll_thread(1);
CU_ASSERT(spdk_thread_is_exited(thread2) == false);
spdk_put_io_channel(ch2);
poll_thread(1);
CU_ASSERT(spdk_thread_is_exited(thread1) == false);
CU_ASSERT(spdk_thread_is_exited(thread2) == true);
poll_thread(0);
CU_ASSERT(spdk_thread_is_exited(thread1) == true);
free_threads();
}
int
main(int argc, char **argv)
{
@ -1261,6 +1353,7 @@ main(int argc, char **argv)
CU_ADD_TEST(suite, thread_exit_test);
CU_ADD_TEST(suite, thread_update_stats_test);
CU_ADD_TEST(suite, nested_channel);
CU_ADD_TEST(suite, device_unregister_and_thread_exit_race);
CU_basic_set_mode(CU_BRM_VERBOSE);
CU_basic_run_tests();