From b5075dcc5b930285caacd6ff608e62a36b9c4d96 Mon Sep 17 00:00:00 2001 From: Mike Gerdts Date: Tue, 25 Oct 2022 08:01:23 -0500 Subject: [PATCH] bdev: action_in_progress counting is racy Since bdev_examine() can happen on any thread and it happens without any other lock being held on the spdk_bdev_module, it is possible for multiple threads to try to simultaneously increment module->internal.action_in_progress. Decrements may also race. This commit adds bdev_module->internal.spinlock and holds it while modifying module->internal.action_in_progress. This can be removed when the bdev_register_examine_thread deprecation is removed. Change-Id: I9c401eeb3c7c97c484e16fa9cfd82668b32e508b Signed-off-by: Mike Gerdts Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15281 Tested-by: SPDK CI Jenkins Reviewed-by: Shuhei Matsumoto Reviewed-by: Jim Harris Community-CI: Mellanox Build Bot --- include/spdk/bdev_module.h | 14 +++++++++++++- lib/bdev/bdev.c | 26 ++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h index 58c1b17a5..85e39becb 100644 --- a/include/spdk/bdev_module.h +++ b/include/spdk/bdev_module.h @@ -144,6 +144,11 @@ struct spdk_bdev_module { * must not read or write to these fields. */ struct __bdev_module_internal_fields { + /** + * Protects action_in_progress. Take no locks while holding this one. + */ + struct spdk_spinlock spinlock; + /** * Count of bdev inits/examinations in progress. Used by generic bdev * layer and must not be modified by bdev modules. @@ -504,7 +509,14 @@ struct spdk_bdev { /** True if the state of the QoS is being modified */ bool qos_mod_in_progress; - /** Spin lock protecting claimed */ + /** + * SPDK spinlock protecting many of the internal fields of this structure. If + * multiple locks need to be held, the following order must be used: + * g_bdev_mgr.spinlock + * bdev->internal.spinlock + * bdev_desc->spinlock + * bdev_module->internal.spinlock + */ struct spdk_spinlock spinlock; /** The bdev status */ diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 58f2666cd..3ac9a9745 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -648,8 +648,10 @@ bdev_examine(struct spdk_bdev *bdev) TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) { if (module->examine_config && bdev_ok_to_examine(bdev)) { + spdk_spin_lock(&module->internal.spinlock); action = module->internal.action_in_progress; module->internal.action_in_progress++; + spdk_spin_unlock(&module->internal.spinlock); module->examine_config(bdev); if (action != module->internal.action_in_progress) { SPDK_ERRLOG("examine_config for module %s did not call spdk_bdev_module_examine_done()\n", @@ -658,17 +660,22 @@ bdev_examine(struct spdk_bdev *bdev) } } - if (bdev->internal.claim_module && bdev_ok_to_examine(bdev)) { - if (bdev->internal.claim_module->examine_disk) { - bdev->internal.claim_module->internal.action_in_progress++; - bdev->internal.claim_module->examine_disk(bdev); + module = bdev->internal.claim_module; + if (module != NULL && bdev_ok_to_examine(bdev)) { + if (module->examine_disk) { + spdk_spin_lock(&module->internal.spinlock); + module->internal.action_in_progress++; + spdk_spin_unlock(&module->internal.spinlock); + module->examine_disk(bdev); } return; } TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) { if (module->examine_disk && bdev_ok_to_examine(bdev)) { + spdk_spin_lock(&module->internal.spinlock); module->internal.action_in_progress++; + spdk_spin_unlock(&module->internal.spinlock); module->examine_disk(bdev); } } @@ -1562,8 +1569,10 @@ bdev_module_action_complete(void) static void bdev_module_action_done(struct spdk_bdev_module *module) { + spdk_spin_lock(&module->internal.spinlock); assert(module->internal.action_in_progress > 0); module->internal.action_in_progress--; + spdk_spin_unlock(&module->internal.spinlock); bdev_module_action_complete(); } @@ -1587,7 +1596,10 @@ bdev_init_failed(void *cb_arg) { struct spdk_bdev_module *module = cb_arg; + spdk_spin_lock(&module->internal.spinlock); + assert(module->internal.action_in_progress > 0); module->internal.action_in_progress--; + spdk_spin_unlock(&module->internal.spinlock); bdev_init_complete(-1); } @@ -1600,13 +1612,17 @@ bdev_modules_init(void) TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) { g_resume_bdev_module = module; if (module->async_init) { + spdk_spin_lock(&module->internal.spinlock); module->internal.action_in_progress = 1; + spdk_spin_unlock(&module->internal.spinlock); } rc = module->module_init(); if (rc != 0) { /* Bump action_in_progress to prevent other modules from completion of modules_init * Send message to defer application shutdown until resources are cleaned up */ + spdk_spin_lock(&module->internal.spinlock); module->internal.action_in_progress = 1; + spdk_spin_unlock(&module->internal.spinlock); spdk_thread_send_msg(spdk_get_thread(), bdev_init_failed, module); return rc; } @@ -7446,6 +7462,8 @@ spdk_bdev_module_list_add(struct spdk_bdev_module *bdev_module) assert(false); } + spdk_spin_init(&bdev_module->internal.spinlock); + /* * Modules with examine callbacks must be initialized first, so they are * ready to handle examine callbacks from later modules that will