From 79ed1ba18dba76178c74f326c7fefe07dec5bfbb Mon Sep 17 00:00:00 2001 From: Maciej Szwed Date: Fri, 24 May 2019 11:06:06 +0200 Subject: [PATCH] bdev: Add spdk_bdev_open_ext function This patch adds new interface for opening bdev and implements new style remove event. With that changes user can be notified about different types of events that occur in regards to bdev. spdk_bdev_open_ext function uses bdev name as an argument instead of bdev structure to remove race condition where user gets the bdev structure and bdev is removed after getting that structure and before open function is called. spdk_bdev_open is now deprecated. Signed-off-by: Maciej Szwed Change-Id: I44ebeb988bc6a2f441fc6a0c38a30668aad999ad Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/455647 Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Broadcom SPDK FC-NVMe CI Reviewed-by: Shuhei Matsumoto --- CHANGELOG.md | 8 +++ doc/bdev_pg.md | 17 +++--- include/spdk/bdev.h | 34 ++++++++++- lib/bdev/bdev.c | 94 +++++++++++++++++++++++------ test/unit/lib/bdev/bdev.c/bdev_ut.c | 57 ++++++++++++++++- 5 files changed, 183 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e0cc48f7..0fb57793f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,14 @@ a null name parameter and will return the only available target. The majority of the NVMe-oF RPCs now accept an optional tgt_name parameter. This will allow those RPCs to work with applications that create more than one target. +### bdev + +A new spdk_bdev_open_ext function has been added and spdk_bdev_open function has been deprecated. +The new open function introduces requirement to provide callback function that will be called by +asynchronous event such as bdev removal. spdk_bdev_open_ext function takes bdev name as +an argument instead of bdev structure to avoid a race condition that can happen when the bdev +is being removed between a call to get its structure based on a name and actually openning it. + ### nvme Added `no_shn_notification` to NVMe controller initialization options, users can enable diff --git a/doc/bdev_pg.md b/doc/bdev_pg.md index 66929c617..67261c2c6 100644 --- a/doc/bdev_pg.md +++ b/doc/bdev_pg.md @@ -72,7 +72,7 @@ name to look up the block device. ## Preparing To Use A Block Device In order to send I/O requests to a block device, it must first be opened by -calling spdk_bdev_open(). This will return a descriptor. Multiple users may have +calling spdk_bdev_open_ext(). This will return a descriptor. Multiple users may have a bdev open at the same time, and coordination of reads and writes between users must be handled by some higher level mechanism outside of the bdev layer. Opening a bdev with write permission may fail if a virtual bdev module @@ -81,13 +81,14 @@ logical volume management and forward their I/O to lower level bdevs, so they mark these lower level bdevs as claimed to prevent outside users from issuing writes. -When a block device is opened, an optional callback and context can be -provided that will be called if the underlying storage servicing the block -device is removed. For example, the remove callback will be called on each -open descriptor for a bdev backed by a physical NVMe SSD when the NVMe SSD is -hot-unplugged. The callback can be thought of as a request to close the open -descriptor so other memory may be freed. A bdev cannot be torn down while open -descriptors exist, so it is highly recommended that a callback is provided. +When a block device is opened, a callback and context must be provided that +will be called with appropriate spdk_bdev_event_type enum as an argument when +the bdev triggers asynchronous event such as bdev removal. For example, +the callback will be called on each open descriptor for a bdev backed by +a physical NVMe SSD when the NVMe SSD is hot-unplugged. In this case +the callback can be thought of as a request to close the open descriptor so +other memory may be freed. A bdev cannot be torn down while open descriptors +exist, so it is required that a callback is provided. When a user is done with a descriptor, they may release it by calling spdk_bdev_close(). diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index 3978ea42d..cd55e63d5 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -60,6 +60,11 @@ extern "C" { */ #define SPDK_BDEV_BUF_SIZE_WITH_MD(x) (((x) / 512) * (512 + 16)) +/** Asynchronous event type */ +enum spdk_bdev_event_type { + SPDK_BDEV_EVENT_REMOVE, +}; + /** * \brief SPDK block device. * @@ -74,6 +79,16 @@ struct spdk_bdev; */ typedef void (*spdk_bdev_remove_cb_t)(void *remove_ctx); +/** + * Block device event callback. + * + * \param event Event details. + * \param bdev Block device that triggered event. + * \param event_ctx Context for the block device event. + */ +typedef void (*spdk_bdev_event_cb_t)(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, + void *event_ctx); + /** * Block device I/O * @@ -256,7 +271,7 @@ struct spdk_bdev *spdk_bdev_first_leaf(void); struct spdk_bdev *spdk_bdev_next_leaf(struct spdk_bdev *prev); /** - * Open a block device for I/O operations. + * Open a block device for I/O operations (deprecated, please use spdk_bdev_open_ext). * * \param bdev Block device to open. * \param write true is read/write access requested, false if read-only @@ -272,6 +287,23 @@ struct spdk_bdev *spdk_bdev_next_leaf(struct spdk_bdev *prev); int spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb, void *remove_ctx, struct spdk_bdev_desc **desc); +/** + * Open a block device for I/O operations. + * + * \param bdev_name Block device name to open. + * \param write true is read/write access requested, false if read-only + * \param event_cb notification callback to be called when the bdev triggers + * asynchronous event such as bdev removal. This will always be called on the + * same thread that spdk_bdev_open() was called on. In case of removal event + * the descriptor will have to be manually closed to make the bdev unregister + * proceed. + * \param event_ctx param for event_cb. + * \param desc output parameter for the descriptor when operation is successful + * \return 0 if operation is successful, suitable errno value otherwise + */ +int spdk_bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb, + void *event_ctx, struct spdk_bdev_desc **desc); + /** * Close a previously opened block device. * diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 935505c37..193b3dc5f 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -273,8 +273,14 @@ struct spdk_bdev_channel { struct spdk_bdev_desc { struct spdk_bdev *bdev; struct spdk_thread *thread; - spdk_bdev_remove_cb_t remove_cb; - void *remove_ctx; + struct { + bool open_with_ext; + union { + spdk_bdev_remove_cb_t remove_fn; + spdk_bdev_event_cb_t event_fn; + }; + void *ctx; + } callback; bool remove_scheduled; bool closed; bool write; @@ -4194,7 +4200,11 @@ _remove_notify(void *arg) if (desc->closed) { free(desc); } else { - desc->remove_cb(desc->remove_ctx); + if (desc->callback.open_with_ext) { + desc->callback.event_fn(SPDK_BDEV_EVENT_REMOVE, desc->bdev, desc->callback.ctx); + } else { + desc->callback.remove_fn(desc->callback.ctx); + } } } @@ -4210,18 +4220,16 @@ spdk_bdev_unregister_unsafe(struct spdk_bdev *bdev) /* Notify each descriptor about hotremoval */ TAILQ_FOREACH_SAFE(desc, &bdev->internal.open_descs, link, tmp) { rc = -EBUSY; - if (desc->remove_cb) { - /* - * Defer invocation of the remove_cb to a separate message that will - * run later on its thread. This ensures this context unwinds and - * we don't recursively unregister this bdev again if the remove_cb - * immediately closes its descriptor. - */ - if (!desc->remove_scheduled) { - /* Avoid scheduling removal of the same descriptor multiple times. */ - desc->remove_scheduled = true; - spdk_thread_send_msg(desc->thread, _remove_notify, desc); - } + /* + * Defer invocation of the event_cb to a separate message that will + * run later on its thread. This ensures this context unwinds and + * we don't recursively unregister this bdev again if the event_cb + * immediately closes its descriptor. + */ + if (!desc->remove_scheduled) { + /* Avoid scheduling removal of the same descriptor multiple times. */ + desc->remove_scheduled = true; + spdk_thread_send_msg(desc->thread, _remove_notify, desc); } } @@ -4353,8 +4361,11 @@ spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_ remove_cb = _spdk_bdev_dummy_event_cb; } - desc->remove_cb = remove_cb; - desc->remove_ctx = remove_ctx; + desc->callback.open_with_ext = false; + desc->callback.remove_fn = remove_cb; + desc->callback.ctx = remove_ctx; + + pthread_mutex_lock(&g_bdev_mgr.mutex); rc = _spdk_bdev_open(bdev, write, desc); if (rc != 0) { @@ -4364,6 +4375,55 @@ spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_ *_desc = desc; + pthread_mutex_unlock(&g_bdev_mgr.mutex); + + return rc; +} + +int +spdk_bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb, + void *event_ctx, struct spdk_bdev_desc **_desc) +{ + struct spdk_bdev_desc *desc; + struct spdk_bdev *bdev; + int rc; + + if (event_cb == NULL) { + SPDK_ERRLOG("Missing event callback function\n"); + return -EINVAL; + } + + pthread_mutex_lock(&g_bdev_mgr.mutex); + + bdev = spdk_bdev_get_by_name(bdev_name); + + if (bdev == NULL) { + SPDK_ERRLOG("Failed to find bdev with name: %s\n", bdev_name); + pthread_mutex_unlock(&g_bdev_mgr.mutex); + return -EINVAL; + } + + desc = calloc(1, sizeof(*desc)); + if (desc == NULL) { + SPDK_ERRLOG("Failed to allocate memory for bdev descriptor\n"); + pthread_mutex_unlock(&g_bdev_mgr.mutex); + return -ENOMEM; + } + + desc->callback.open_with_ext = true; + desc->callback.event_fn = event_cb; + desc->callback.ctx = event_ctx; + + rc = _spdk_bdev_open(bdev, write, desc); + if (rc != 0) { + free(desc); + desc = NULL; + } + + *_desc = desc; + + pthread_mutex_unlock(&g_bdev_mgr.mutex); + return rc; } diff --git a/test/unit/lib/bdev/bdev.c/bdev_ut.c b/test/unit/lib/bdev/bdev.c/bdev_ut.c index 4ad38f07f..3964c96cc 100644 --- a/test/unit/lib/bdev/bdev.c/bdev_ut.c +++ b/test/unit/lib/bdev/bdev.c/bdev_ut.c @@ -64,6 +64,8 @@ DEFINE_STUB(spdk_notify_type_register, struct spdk_notify_type *, (const char *t int g_status; int g_count; +enum spdk_bdev_event_type g_event_type1; +enum spdk_bdev_event_type g_event_type2; struct spdk_histogram_data *g_histogram; void @@ -1998,6 +2000,58 @@ bdev_open_while_hotremove(void) free_bdev(bdev); } +static void +bdev_open_cb1(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) +{ + struct spdk_bdev_desc *desc = *(struct spdk_bdev_desc **)event_ctx; + + g_event_type1 = type; + spdk_bdev_close(desc); +} + +static void +bdev_open_cb2(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) +{ + struct spdk_bdev_desc *desc = *(struct spdk_bdev_desc **)event_ctx; + + g_event_type2 = type; + spdk_bdev_close(desc); +} + +static void +bdev_open_ext(void) +{ + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc1 = NULL; + struct spdk_bdev_desc *desc2 = NULL; + int rc = 0; + + bdev = allocate_bdev("bdev"); + + rc = spdk_bdev_open_ext("bdev", true, NULL, NULL, &desc1); + CU_ASSERT_EQUAL(rc, -EINVAL); + + rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb1, &desc1, &desc1); + CU_ASSERT_EQUAL(rc, 0); + + rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb2, &desc2, &desc2); + CU_ASSERT_EQUAL(rc, 0); + + g_event_type1 = 0xFF; + g_event_type2 = 0xFF; + + /* Simulate hot-unplug by unregistering bdev */ + spdk_bdev_unregister(bdev, NULL, NULL); + poll_threads(); + + /* Check if correct events have been triggered in event callback fn */ + CU_ASSERT_EQUAL(g_event_type1, SPDK_BDEV_EVENT_REMOVE); + CU_ASSERT_EQUAL(g_event_type2, SPDK_BDEV_EVENT_REMOVE); + + free_bdev(bdev); + poll_threads(); +} + int main(int argc, char **argv) { @@ -2030,7 +2084,8 @@ main(int argc, char **argv) CU_add_test(suite, "bdev_io_alignment", bdev_io_alignment) == NULL || CU_add_test(suite, "bdev_histograms", bdev_histograms) == NULL || CU_add_test(suite, "bdev_write_zeroes", bdev_write_zeroes) == NULL || - CU_add_test(suite, "bdev_open_while_hotremove", bdev_open_while_hotremove) == NULL + CU_add_test(suite, "bdev_open_while_hotremove", bdev_open_while_hotremove) == NULL || + CU_add_test(suite, "bdev_open_ext", bdev_open_ext) == NULL ) { CU_cleanup_registry(); return CU_get_error();