diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e0cc48f7..0fb57793f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,14 @@ a null name parameter and will return the only available target. The majority of the NVMe-oF RPCs now accept an optional tgt_name parameter. This will allow those RPCs to work with applications that create more than one target. +### bdev + +A new spdk_bdev_open_ext function has been added and spdk_bdev_open function has been deprecated. +The new open function introduces requirement to provide callback function that will be called by +asynchronous event such as bdev removal. spdk_bdev_open_ext function takes bdev name as +an argument instead of bdev structure to avoid a race condition that can happen when the bdev +is being removed between a call to get its structure based on a name and actually openning it. + ### nvme Added `no_shn_notification` to NVMe controller initialization options, users can enable diff --git a/doc/bdev_pg.md b/doc/bdev_pg.md index 66929c617..67261c2c6 100644 --- a/doc/bdev_pg.md +++ b/doc/bdev_pg.md @@ -72,7 +72,7 @@ name to look up the block device. ## Preparing To Use A Block Device In order to send I/O requests to a block device, it must first be opened by -calling spdk_bdev_open(). This will return a descriptor. Multiple users may have +calling spdk_bdev_open_ext(). This will return a descriptor. Multiple users may have a bdev open at the same time, and coordination of reads and writes between users must be handled by some higher level mechanism outside of the bdev layer. Opening a bdev with write permission may fail if a virtual bdev module @@ -81,13 +81,14 @@ logical volume management and forward their I/O to lower level bdevs, so they mark these lower level bdevs as claimed to prevent outside users from issuing writes. -When a block device is opened, an optional callback and context can be -provided that will be called if the underlying storage servicing the block -device is removed. For example, the remove callback will be called on each -open descriptor for a bdev backed by a physical NVMe SSD when the NVMe SSD is -hot-unplugged. The callback can be thought of as a request to close the open -descriptor so other memory may be freed. A bdev cannot be torn down while open -descriptors exist, so it is highly recommended that a callback is provided. +When a block device is opened, a callback and context must be provided that +will be called with appropriate spdk_bdev_event_type enum as an argument when +the bdev triggers asynchronous event such as bdev removal. For example, +the callback will be called on each open descriptor for a bdev backed by +a physical NVMe SSD when the NVMe SSD is hot-unplugged. In this case +the callback can be thought of as a request to close the open descriptor so +other memory may be freed. A bdev cannot be torn down while open descriptors +exist, so it is required that a callback is provided. When a user is done with a descriptor, they may release it by calling spdk_bdev_close(). diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index 3978ea42d..cd55e63d5 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -60,6 +60,11 @@ extern "C" { */ #define SPDK_BDEV_BUF_SIZE_WITH_MD(x) (((x) / 512) * (512 + 16)) +/** Asynchronous event type */ +enum spdk_bdev_event_type { + SPDK_BDEV_EVENT_REMOVE, +}; + /** * \brief SPDK block device. * @@ -74,6 +79,16 @@ struct spdk_bdev; */ typedef void (*spdk_bdev_remove_cb_t)(void *remove_ctx); +/** + * Block device event callback. + * + * \param event Event details. + * \param bdev Block device that triggered event. + * \param event_ctx Context for the block device event. + */ +typedef void (*spdk_bdev_event_cb_t)(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, + void *event_ctx); + /** * Block device I/O * @@ -256,7 +271,7 @@ struct spdk_bdev *spdk_bdev_first_leaf(void); struct spdk_bdev *spdk_bdev_next_leaf(struct spdk_bdev *prev); /** - * Open a block device for I/O operations. + * Open a block device for I/O operations (deprecated, please use spdk_bdev_open_ext). * * \param bdev Block device to open. * \param write true is read/write access requested, false if read-only @@ -272,6 +287,23 @@ struct spdk_bdev *spdk_bdev_next_leaf(struct spdk_bdev *prev); int spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb, void *remove_ctx, struct spdk_bdev_desc **desc); +/** + * Open a block device for I/O operations. + * + * \param bdev_name Block device name to open. + * \param write true is read/write access requested, false if read-only + * \param event_cb notification callback to be called when the bdev triggers + * asynchronous event such as bdev removal. This will always be called on the + * same thread that spdk_bdev_open() was called on. In case of removal event + * the descriptor will have to be manually closed to make the bdev unregister + * proceed. + * \param event_ctx param for event_cb. + * \param desc output parameter for the descriptor when operation is successful + * \return 0 if operation is successful, suitable errno value otherwise + */ +int spdk_bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb, + void *event_ctx, struct spdk_bdev_desc **desc); + /** * Close a previously opened block device. * diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 935505c37..193b3dc5f 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -273,8 +273,14 @@ struct spdk_bdev_channel { struct spdk_bdev_desc { struct spdk_bdev *bdev; struct spdk_thread *thread; - spdk_bdev_remove_cb_t remove_cb; - void *remove_ctx; + struct { + bool open_with_ext; + union { + spdk_bdev_remove_cb_t remove_fn; + spdk_bdev_event_cb_t event_fn; + }; + void *ctx; + } callback; bool remove_scheduled; bool closed; bool write; @@ -4194,7 +4200,11 @@ _remove_notify(void *arg) if (desc->closed) { free(desc); } else { - desc->remove_cb(desc->remove_ctx); + if (desc->callback.open_with_ext) { + desc->callback.event_fn(SPDK_BDEV_EVENT_REMOVE, desc->bdev, desc->callback.ctx); + } else { + desc->callback.remove_fn(desc->callback.ctx); + } } } @@ -4210,18 +4220,16 @@ spdk_bdev_unregister_unsafe(struct spdk_bdev *bdev) /* Notify each descriptor about hotremoval */ TAILQ_FOREACH_SAFE(desc, &bdev->internal.open_descs, link, tmp) { rc = -EBUSY; - if (desc->remove_cb) { - /* - * Defer invocation of the remove_cb to a separate message that will - * run later on its thread. This ensures this context unwinds and - * we don't recursively unregister this bdev again if the remove_cb - * immediately closes its descriptor. - */ - if (!desc->remove_scheduled) { - /* Avoid scheduling removal of the same descriptor multiple times. */ - desc->remove_scheduled = true; - spdk_thread_send_msg(desc->thread, _remove_notify, desc); - } + /* + * Defer invocation of the event_cb to a separate message that will + * run later on its thread. This ensures this context unwinds and + * we don't recursively unregister this bdev again if the event_cb + * immediately closes its descriptor. + */ + if (!desc->remove_scheduled) { + /* Avoid scheduling removal of the same descriptor multiple times. */ + desc->remove_scheduled = true; + spdk_thread_send_msg(desc->thread, _remove_notify, desc); } } @@ -4353,8 +4361,11 @@ spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_ remove_cb = _spdk_bdev_dummy_event_cb; } - desc->remove_cb = remove_cb; - desc->remove_ctx = remove_ctx; + desc->callback.open_with_ext = false; + desc->callback.remove_fn = remove_cb; + desc->callback.ctx = remove_ctx; + + pthread_mutex_lock(&g_bdev_mgr.mutex); rc = _spdk_bdev_open(bdev, write, desc); if (rc != 0) { @@ -4364,6 +4375,55 @@ spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_ *_desc = desc; + pthread_mutex_unlock(&g_bdev_mgr.mutex); + + return rc; +} + +int +spdk_bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb, + void *event_ctx, struct spdk_bdev_desc **_desc) +{ + struct spdk_bdev_desc *desc; + struct spdk_bdev *bdev; + int rc; + + if (event_cb == NULL) { + SPDK_ERRLOG("Missing event callback function\n"); + return -EINVAL; + } + + pthread_mutex_lock(&g_bdev_mgr.mutex); + + bdev = spdk_bdev_get_by_name(bdev_name); + + if (bdev == NULL) { + SPDK_ERRLOG("Failed to find bdev with name: %s\n", bdev_name); + pthread_mutex_unlock(&g_bdev_mgr.mutex); + return -EINVAL; + } + + desc = calloc(1, sizeof(*desc)); + if (desc == NULL) { + SPDK_ERRLOG("Failed to allocate memory for bdev descriptor\n"); + pthread_mutex_unlock(&g_bdev_mgr.mutex); + return -ENOMEM; + } + + desc->callback.open_with_ext = true; + desc->callback.event_fn = event_cb; + desc->callback.ctx = event_ctx; + + rc = _spdk_bdev_open(bdev, write, desc); + if (rc != 0) { + free(desc); + desc = NULL; + } + + *_desc = desc; + + pthread_mutex_unlock(&g_bdev_mgr.mutex); + return rc; } diff --git a/test/unit/lib/bdev/bdev.c/bdev_ut.c b/test/unit/lib/bdev/bdev.c/bdev_ut.c index 4ad38f07f..3964c96cc 100644 --- a/test/unit/lib/bdev/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/bdev.c/bdev_ut.c @@ -64,6 +64,8 @@ DEFINE_STUB(spdk_notify_type_register, struct spdk_notify_type *, (const char *t int g_status; int g_count; +enum spdk_bdev_event_type g_event_type1; +enum spdk_bdev_event_type g_event_type2; struct spdk_histogram_data *g_histogram; void @@ -1998,6 +2000,58 @@ bdev_open_while_hotremove(void) free_bdev(bdev); } +static void +bdev_open_cb1(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) +{ + struct spdk_bdev_desc *desc = *(struct spdk_bdev_desc **)event_ctx; + + g_event_type1 = type; + spdk_bdev_close(desc); +} + +static void +bdev_open_cb2(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) +{ + struct spdk_bdev_desc *desc = *(struct spdk_bdev_desc **)event_ctx; + + g_event_type2 = type; + spdk_bdev_close(desc); +} + +static void +bdev_open_ext(void) +{ + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc1 = NULL; + struct spdk_bdev_desc *desc2 = NULL; + int rc = 0; + + bdev = allocate_bdev("bdev"); + + rc = spdk_bdev_open_ext("bdev", true, NULL, NULL, &desc1); + CU_ASSERT_EQUAL(rc, -EINVAL); + + rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb1, &desc1, &desc1); + CU_ASSERT_EQUAL(rc, 0); + + rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb2, &desc2, &desc2); + CU_ASSERT_EQUAL(rc, 0); + + g_event_type1 = 0xFF; + g_event_type2 = 0xFF; + + /* Simulate hot-unplug by unregistering bdev */ + spdk_bdev_unregister(bdev, NULL, NULL); + poll_threads(); + + /* Check if correct events have been triggered in event callback fn */ + CU_ASSERT_EQUAL(g_event_type1, SPDK_BDEV_EVENT_REMOVE); + CU_ASSERT_EQUAL(g_event_type2, SPDK_BDEV_EVENT_REMOVE); + + free_bdev(bdev); + poll_threads(); +} + int main(int argc, char **argv) { @@ -2030,7 +2084,8 @@ main(int argc, char **argv) CU_add_test(suite, "bdev_io_alignment", bdev_io_alignment) == NULL || CU_add_test(suite, "bdev_histograms", bdev_histograms) == NULL || CU_add_test(suite, "bdev_write_zeroes", bdev_write_zeroes) == NULL || - CU_add_test(suite, "bdev_open_while_hotremove", bdev_open_while_hotremove) == NULL + CU_add_test(suite, "bdev_open_while_hotremove", bdev_open_while_hotremove) == NULL || + CU_add_test(suite, "bdev_open_ext", bdev_open_ext) == NULL ) { CU_cleanup_registry(); return CU_get_error();