lib/thread: Defer exiting thread if thread is unregistering io_device
Current SPDK thread library has a issue which occurs if there is a race between exiting thread and unregistering io_device. For example, there are two threads. Thread 1 registers a device and thread 2 gets a channel of the device. Then if thread 1 starts exiting and unregisters the device, and then thread 2 puts the channel, thread 2 sends a message to thread 1 to complete releasing the device, thread 1 already moved exited. Hence thread 2 failed to send the message. This patch fixes the race issue. The code is verified by adding a unit test case. In detail, add a count, unregistering_dev, to struct spdk_thread, increment it if a callback is specified to spdk_io_device_unregister(), and then decrement it in _finish_unregister(), and thread_exit() checks if it is zero. The contents of struct spdk_thread is changed but it is not public data structure, and hence suppress it for ABI testing. Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Change-Id: Idf5faa55335c3ea89f47ccce32687a6be2e26c68 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5796 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: <dongx.yi@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
This commit is contained in:
parent
ad583422ce
commit
6fdc71ec18
@ -119,6 +119,7 @@ struct spdk_thread {
|
||||
spdk_msg_fn critical_msg;
|
||||
uint64_t id;
|
||||
enum spdk_thread_state state;
|
||||
int pending_unregister_count;
|
||||
|
||||
TAILQ_HEAD(, spdk_io_channel) io_channels;
|
||||
TAILQ_ENTRY(spdk_thread) tailq;
|
||||
|
@ -393,6 +393,13 @@ thread_exit(struct spdk_thread *thread, uint64_t now)
|
||||
return;
|
||||
}
|
||||
|
||||
if (thread->pending_unregister_count > 0) {
|
||||
SPDK_INFOLOG(thread,
|
||||
"thread %s is still unregistering io_devices\n",
|
||||
thread->name);
|
||||
return;
|
||||
}
|
||||
|
||||
exited:
|
||||
thread->state = SPDK_THREAD_STATE_EXITED;
|
||||
}
|
||||
@ -1394,9 +1401,16 @@ static void
|
||||
_finish_unregister(void *arg)
|
||||
{
|
||||
struct io_device *dev = arg;
|
||||
struct spdk_thread *thread;
|
||||
|
||||
thread = spdk_get_thread();
|
||||
assert(thread == dev->unregister_thread);
|
||||
|
||||
SPDK_DEBUGLOG(thread, "Finishing unregistration of io_device %s (%p) on thread %s\n",
|
||||
dev->name, dev->io_device, dev->unregister_thread->name);
|
||||
dev->name, dev->io_device, thread->name);
|
||||
|
||||
assert(thread->pending_unregister_count > 0);
|
||||
thread->pending_unregister_count--;
|
||||
|
||||
dev->unregister_cb(dev->io_device);
|
||||
free(dev);
|
||||
@ -1463,6 +1477,10 @@ spdk_io_device_unregister(void *io_device, spdk_io_device_unregister_cb unregist
|
||||
SPDK_DEBUGLOG(thread, "Unregistering io_device %s (%p) from thread %s\n",
|
||||
dev->name, dev->io_device, thread->name);
|
||||
|
||||
if (unregister_cb) {
|
||||
thread->pending_unregister_count++;
|
||||
}
|
||||
|
||||
if (refcnt > 0) {
|
||||
/* defer deletion */
|
||||
return;
|
||||
|
@ -58,6 +58,8 @@ function confirm_abi_deps() {
|
||||
name = spdk_env_opts
|
||||
[suppress_type]
|
||||
name = spdk_app_opts
|
||||
[suppress_type]
|
||||
name = spdk_thread
|
||||
EOF
|
||||
|
||||
for object in "$libdir"/libspdk_*.so; do
|
||||
|
@ -1237,6 +1237,98 @@ nested_channel(void)
|
||||
CU_ASSERT(TAILQ_EMPTY(&g_threads));
|
||||
}
|
||||
|
||||
static int
|
||||
create_cb2(void *io_device, void *ctx_buf)
|
||||
{
|
||||
uint64_t *devcnt = (uint64_t *)io_device;
|
||||
|
||||
*devcnt += 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
destroy_cb2(void *io_device, void *ctx_buf)
|
||||
{
|
||||
uint64_t *devcnt = (uint64_t *)io_device;
|
||||
|
||||
CU_ASSERT(*devcnt > 0);
|
||||
*devcnt -= 1;
|
||||
}
|
||||
|
||||
static void
|
||||
unregister_cb2(void *io_device)
|
||||
{
|
||||
uint64_t *devcnt = (uint64_t *)io_device;
|
||||
|
||||
CU_ASSERT(*devcnt == 0);
|
||||
}
|
||||
|
||||
static void
|
||||
device_unregister_and_thread_exit_race(void)
|
||||
{
|
||||
uint64_t device = 0;
|
||||
struct spdk_io_channel *ch1, *ch2;
|
||||
struct spdk_thread *thread1, *thread2;
|
||||
|
||||
/* Create two threads and each thread gets a channel from the same device. */
|
||||
allocate_threads(2);
|
||||
set_thread(0);
|
||||
|
||||
thread1 = spdk_get_thread();
|
||||
SPDK_CU_ASSERT_FATAL(thread1 != NULL);
|
||||
|
||||
spdk_io_device_register(&device, create_cb2, destroy_cb2, sizeof(uint64_t), NULL);
|
||||
|
||||
ch1 = spdk_get_io_channel(&device);
|
||||
SPDK_CU_ASSERT_FATAL(ch1 != NULL);
|
||||
|
||||
set_thread(1);
|
||||
|
||||
thread2 = spdk_get_thread();
|
||||
SPDK_CU_ASSERT_FATAL(thread2 != NULL);
|
||||
|
||||
ch2 = spdk_get_io_channel(&device);
|
||||
SPDK_CU_ASSERT_FATAL(ch2 != NULL);
|
||||
|
||||
set_thread(0);
|
||||
|
||||
/* Move thread 0 to the exiting state, but it should keep exiting until two channels
|
||||
* and a device are released.
|
||||
*/
|
||||
spdk_thread_exit(thread1);
|
||||
poll_thread(0);
|
||||
|
||||
spdk_put_io_channel(ch1);
|
||||
|
||||
spdk_io_device_unregister(&device, unregister_cb2);
|
||||
poll_thread(0);
|
||||
|
||||
CU_ASSERT(spdk_thread_is_exited(thread1) == false);
|
||||
|
||||
set_thread(1);
|
||||
|
||||
/* Move thread 1 to the exiting state, but it should keep exiting until its channel
|
||||
* is released.
|
||||
*/
|
||||
spdk_thread_exit(thread2);
|
||||
poll_thread(1);
|
||||
|
||||
CU_ASSERT(spdk_thread_is_exited(thread2) == false);
|
||||
|
||||
spdk_put_io_channel(ch2);
|
||||
poll_thread(1);
|
||||
|
||||
CU_ASSERT(spdk_thread_is_exited(thread1) == false);
|
||||
CU_ASSERT(spdk_thread_is_exited(thread2) == true);
|
||||
|
||||
poll_thread(0);
|
||||
|
||||
CU_ASSERT(spdk_thread_is_exited(thread1) == true);
|
||||
|
||||
free_threads();
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
@ -1261,6 +1353,7 @@ main(int argc, char **argv)
|
||||
CU_ADD_TEST(suite, thread_exit_test);
|
||||
CU_ADD_TEST(suite, thread_update_stats_test);
|
||||
CU_ADD_TEST(suite, nested_channel);
|
||||
CU_ADD_TEST(suite, device_unregister_and_thread_exit_race);
|
||||
|
||||
CU_basic_set_mode(CU_BRM_VERBOSE);
|
||||
CU_basic_run_tests();
|
||||
|
Loading…
Reference in New Issue
Block a user