From 6fdc71ec18b22ffa2a7100a1bd49284bd32a1db8 Mon Sep 17 00:00:00 2001 From: Shuhei Matsumoto Date: Wed, 6 Jan 2021 08:56:15 +0900 Subject: [PATCH] lib/thread: Defer exiting thread if thread is unregistering io_device Current SPDK thread library has a issue which occurs if there is a race between exiting thread and unregistering io_device. For example, there are two threads. Thread 1 registers a device and thread 2 gets a channel of the device. Then if thread 1 starts exiting and unregisters the device, and then thread 2 puts the channel, thread 2 sends a message to thread 1 to complete releasing the device, thread 1 already moved exited. Hence thread 2 failed to send the message. This patch fixes the race issue. The code is verified by adding a unit test case. In detail, add a count, unregistering_dev, to struct spdk_thread, increment it if a callback is specified to spdk_io_device_unregister(), and then decrement it in _finish_unregister(), and thread_exit() checks if it is zero. The contents of struct spdk_thread is changed but it is not public data structure, and hence suppress it for ABI testing. Signed-off-by: Shuhei Matsumoto Change-Id: Idf5faa55335c3ea89f47ccce32687a6be2e26c68 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5796 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Reviewed-by: Jim Harris Reviewed-by: Changpeng Liu --- include/spdk_internal/thread.h | 1 + lib/thread/thread.c | 20 ++++- test/make/check_so_deps.sh | 2 + test/unit/lib/thread/thread.c/thread_ut.c | 93 +++++++++++++++++++++++ 4 files changed, 115 insertions(+), 1 deletion(-) diff --git a/include/spdk_internal/thread.h b/include/spdk_internal/thread.h index 9f32898eb..5bab452dc 100644 --- a/include/spdk_internal/thread.h +++ b/include/spdk_internal/thread.h @@ -119,6 +119,7 @@ struct spdk_thread { spdk_msg_fn critical_msg; uint64_t id; enum spdk_thread_state state; + int pending_unregister_count; TAILQ_HEAD(, spdk_io_channel) io_channels; TAILQ_ENTRY(spdk_thread) tailq; diff --git a/lib/thread/thread.c b/lib/thread/thread.c index a336bb44f..08a128404 100644 --- a/lib/thread/thread.c +++ b/lib/thread/thread.c @@ -393,6 +393,13 @@ thread_exit(struct spdk_thread *thread, uint64_t now) return; } + if (thread->pending_unregister_count > 0) { + SPDK_INFOLOG(thread, + "thread %s is still unregistering io_devices\n", + thread->name); + return; + } + exited: thread->state = SPDK_THREAD_STATE_EXITED; } @@ -1394,9 +1401,16 @@ static void _finish_unregister(void *arg) { struct io_device *dev = arg; + struct spdk_thread *thread; + + thread = spdk_get_thread(); + assert(thread == dev->unregister_thread); SPDK_DEBUGLOG(thread, "Finishing unregistration of io_device %s (%p) on thread %s\n", - dev->name, dev->io_device, dev->unregister_thread->name); + dev->name, dev->io_device, thread->name); + + assert(thread->pending_unregister_count > 0); + thread->pending_unregister_count--; dev->unregister_cb(dev->io_device); free(dev); @@ -1463,6 +1477,10 @@ spdk_io_device_unregister(void *io_device, spdk_io_device_unregister_cb unregist SPDK_DEBUGLOG(thread, "Unregistering io_device %s (%p) from thread %s\n", dev->name, dev->io_device, thread->name); + if (unregister_cb) { + thread->pending_unregister_count++; + } + if (refcnt > 0) { /* defer deletion */ return; diff --git a/test/make/check_so_deps.sh b/test/make/check_so_deps.sh index af8d8fcfb..122c1e078 100755 --- a/test/make/check_so_deps.sh +++ b/test/make/check_so_deps.sh @@ -58,6 +58,8 @@ function confirm_abi_deps() { name = spdk_env_opts [suppress_type] name = spdk_app_opts +[suppress_type] + name = spdk_thread EOF for object in "$libdir"/libspdk_*.so; do diff --git a/test/unit/lib/thread/thread.c/thread_ut.c b/test/unit/lib/thread/thread.c/thread_ut.c index d577671b8..2afe913b6 100644 --- a/test/unit/lib/thread/thread.c/thread_ut.c +++ b/test/unit/lib/thread/thread.c/thread_ut.c @@ -1237,6 +1237,98 @@ nested_channel(void) CU_ASSERT(TAILQ_EMPTY(&g_threads)); } +static int +create_cb2(void *io_device, void *ctx_buf) +{ + uint64_t *devcnt = (uint64_t *)io_device; + + *devcnt += 1; + + return 0; +} + +static void +destroy_cb2(void *io_device, void *ctx_buf) +{ + uint64_t *devcnt = (uint64_t *)io_device; + + CU_ASSERT(*devcnt > 0); + *devcnt -= 1; +} + +static void +unregister_cb2(void *io_device) +{ + uint64_t *devcnt = (uint64_t *)io_device; + + CU_ASSERT(*devcnt == 0); +} + +static void +device_unregister_and_thread_exit_race(void) +{ + uint64_t device = 0; + struct spdk_io_channel *ch1, *ch2; + struct spdk_thread *thread1, *thread2; + + /* Create two threads and each thread gets a channel from the same device. */ + allocate_threads(2); + set_thread(0); + + thread1 = spdk_get_thread(); + SPDK_CU_ASSERT_FATAL(thread1 != NULL); + + spdk_io_device_register(&device, create_cb2, destroy_cb2, sizeof(uint64_t), NULL); + + ch1 = spdk_get_io_channel(&device); + SPDK_CU_ASSERT_FATAL(ch1 != NULL); + + set_thread(1); + + thread2 = spdk_get_thread(); + SPDK_CU_ASSERT_FATAL(thread2 != NULL); + + ch2 = spdk_get_io_channel(&device); + SPDK_CU_ASSERT_FATAL(ch2 != NULL); + + set_thread(0); + + /* Move thread 0 to the exiting state, but it should keep exiting until two channels + * and a device are released. + */ + spdk_thread_exit(thread1); + poll_thread(0); + + spdk_put_io_channel(ch1); + + spdk_io_device_unregister(&device, unregister_cb2); + poll_thread(0); + + CU_ASSERT(spdk_thread_is_exited(thread1) == false); + + set_thread(1); + + /* Move thread 1 to the exiting state, but it should keep exiting until its channel + * is released. + */ + spdk_thread_exit(thread2); + poll_thread(1); + + CU_ASSERT(spdk_thread_is_exited(thread2) == false); + + spdk_put_io_channel(ch2); + poll_thread(1); + + CU_ASSERT(spdk_thread_is_exited(thread1) == false); + CU_ASSERT(spdk_thread_is_exited(thread2) == true); + + poll_thread(0); + + CU_ASSERT(spdk_thread_is_exited(thread1) == true); + + free_threads(); +} + int main(int argc, char **argv) { @@ -1261,6 +1353,7 @@ main(int argc, char **argv) CU_ADD_TEST(suite, thread_exit_test); CU_ADD_TEST(suite, thread_update_stats_test); CU_ADD_TEST(suite, nested_channel); + CU_ADD_TEST(suite, device_unregister_and_thread_exit_race); CU_basic_set_mode(CU_BRM_VERBOSE); CU_basic_run_tests();