bdev: Add spdk_bdev_open_ext function

This patch adds new interface for opening bdev and
implements new style remove event. With that changes
user can be notified about different types of events
that occur in regards to bdev. spdk_bdev_open_ext
function uses bdev name as an argument instead of bdev
structure to remove race condition where user gets
the bdev structure and bdev is removed after getting
that structure and before open function is called.

spdk_bdev_open is now deprecated.

Signed-off-by: Maciej Szwed <maciej.szwed@intel.com>
Change-Id: I44ebeb988bc6a2f441fc6a0c38a30668aad999ad
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/455647
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Broadcom SPDK FC-NVMe CI <spdk-ci.pdl@broadcom.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
Maciej Szwed 2019-05-24 11:06:06 +02:00 committed by Jim Harris
parent eab7360bcb
commit 79ed1ba18d
5 changed files with 183 additions and 27 deletions

View File

@ -16,6 +16,14 @@ a null name parameter and will return the only available target.
The majority of the NVMe-oF RPCs now accept an optional tgt_name parameter. This will The majority of the NVMe-oF RPCs now accept an optional tgt_name parameter. This will
allow those RPCs to work with applications that create more than one target. allow those RPCs to work with applications that create more than one target.
### bdev
A new spdk_bdev_open_ext function has been added and spdk_bdev_open function has been deprecated.
The new open function introduces requirement to provide callback function that will be called by
asynchronous event such as bdev removal. spdk_bdev_open_ext function takes bdev name as
an argument instead of bdev structure to avoid a race condition that can happen when the bdev
is being removed between a call to get its structure based on a name and actually openning it.
### nvme ### nvme
Added `no_shn_notification` to NVMe controller initialization options, users can enable Added `no_shn_notification` to NVMe controller initialization options, users can enable

View File

@ -72,7 +72,7 @@ name to look up the block device.
## Preparing To Use A Block Device ## Preparing To Use A Block Device
In order to send I/O requests to a block device, it must first be opened by In order to send I/O requests to a block device, it must first be opened by
calling spdk_bdev_open(). This will return a descriptor. Multiple users may have calling spdk_bdev_open_ext(). This will return a descriptor. Multiple users may have
a bdev open at the same time, and coordination of reads and writes between a bdev open at the same time, and coordination of reads and writes between
users must be handled by some higher level mechanism outside of the bdev users must be handled by some higher level mechanism outside of the bdev
layer. Opening a bdev with write permission may fail if a virtual bdev module layer. Opening a bdev with write permission may fail if a virtual bdev module
@ -81,13 +81,14 @@ logical volume management and forward their I/O to lower level bdevs, so they
mark these lower level bdevs as claimed to prevent outside users from issuing mark these lower level bdevs as claimed to prevent outside users from issuing
writes. writes.
When a block device is opened, an optional callback and context can be When a block device is opened, a callback and context must be provided that
provided that will be called if the underlying storage servicing the block will be called with appropriate spdk_bdev_event_type enum as an argument when
device is removed. For example, the remove callback will be called on each the bdev triggers asynchronous event such as bdev removal. For example,
open descriptor for a bdev backed by a physical NVMe SSD when the NVMe SSD is the callback will be called on each open descriptor for a bdev backed by
hot-unplugged. The callback can be thought of as a request to close the open a physical NVMe SSD when the NVMe SSD is hot-unplugged. In this case
descriptor so other memory may be freed. A bdev cannot be torn down while open the callback can be thought of as a request to close the open descriptor so
descriptors exist, so it is highly recommended that a callback is provided. other memory may be freed. A bdev cannot be torn down while open descriptors
exist, so it is required that a callback is provided.
When a user is done with a descriptor, they may release it by calling When a user is done with a descriptor, they may release it by calling
spdk_bdev_close(). spdk_bdev_close().

View File

@ -60,6 +60,11 @@ extern "C" {
*/ */
#define SPDK_BDEV_BUF_SIZE_WITH_MD(x) (((x) / 512) * (512 + 16)) #define SPDK_BDEV_BUF_SIZE_WITH_MD(x) (((x) / 512) * (512 + 16))
/** Asynchronous event type */
enum spdk_bdev_event_type {
SPDK_BDEV_EVENT_REMOVE,
};
/** /**
* \brief SPDK block device. * \brief SPDK block device.
* *
@ -74,6 +79,16 @@ struct spdk_bdev;
*/ */
typedef void (*spdk_bdev_remove_cb_t)(void *remove_ctx); typedef void (*spdk_bdev_remove_cb_t)(void *remove_ctx);
/**
* Block device event callback.
*
* \param event Event details.
* \param bdev Block device that triggered event.
* \param event_ctx Context for the block device event.
*/
typedef void (*spdk_bdev_event_cb_t)(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
void *event_ctx);
/** /**
* Block device I/O * Block device I/O
* *
@ -256,7 +271,7 @@ struct spdk_bdev *spdk_bdev_first_leaf(void);
struct spdk_bdev *spdk_bdev_next_leaf(struct spdk_bdev *prev); struct spdk_bdev *spdk_bdev_next_leaf(struct spdk_bdev *prev);
/** /**
* Open a block device for I/O operations. * Open a block device for I/O operations (deprecated, please use spdk_bdev_open_ext).
* *
* \param bdev Block device to open. * \param bdev Block device to open.
* \param write true is read/write access requested, false if read-only * \param write true is read/write access requested, false if read-only
@ -272,6 +287,23 @@ struct spdk_bdev *spdk_bdev_next_leaf(struct spdk_bdev *prev);
int spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb, int spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb,
void *remove_ctx, struct spdk_bdev_desc **desc); void *remove_ctx, struct spdk_bdev_desc **desc);
/**
* Open a block device for I/O operations.
*
* \param bdev_name Block device name to open.
* \param write true is read/write access requested, false if read-only
* \param event_cb notification callback to be called when the bdev triggers
* asynchronous event such as bdev removal. This will always be called on the
* same thread that spdk_bdev_open() was called on. In case of removal event
* the descriptor will have to be manually closed to make the bdev unregister
* proceed.
* \param event_ctx param for event_cb.
* \param desc output parameter for the descriptor when operation is successful
* \return 0 if operation is successful, suitable errno value otherwise
*/
int spdk_bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb,
void *event_ctx, struct spdk_bdev_desc **desc);
/** /**
* Close a previously opened block device. * Close a previously opened block device.
* *

View File

@ -273,8 +273,14 @@ struct spdk_bdev_channel {
struct spdk_bdev_desc { struct spdk_bdev_desc {
struct spdk_bdev *bdev; struct spdk_bdev *bdev;
struct spdk_thread *thread; struct spdk_thread *thread;
spdk_bdev_remove_cb_t remove_cb; struct {
void *remove_ctx; bool open_with_ext;
union {
spdk_bdev_remove_cb_t remove_fn;
spdk_bdev_event_cb_t event_fn;
};
void *ctx;
} callback;
bool remove_scheduled; bool remove_scheduled;
bool closed; bool closed;
bool write; bool write;
@ -4194,7 +4200,11 @@ _remove_notify(void *arg)
if (desc->closed) { if (desc->closed) {
free(desc); free(desc);
} else { } else {
desc->remove_cb(desc->remove_ctx); if (desc->callback.open_with_ext) {
desc->callback.event_fn(SPDK_BDEV_EVENT_REMOVE, desc->bdev, desc->callback.ctx);
} else {
desc->callback.remove_fn(desc->callback.ctx);
}
} }
} }
@ -4210,18 +4220,16 @@ spdk_bdev_unregister_unsafe(struct spdk_bdev *bdev)
/* Notify each descriptor about hotremoval */ /* Notify each descriptor about hotremoval */
TAILQ_FOREACH_SAFE(desc, &bdev->internal.open_descs, link, tmp) { TAILQ_FOREACH_SAFE(desc, &bdev->internal.open_descs, link, tmp) {
rc = -EBUSY; rc = -EBUSY;
if (desc->remove_cb) { /*
/* * Defer invocation of the event_cb to a separate message that will
* Defer invocation of the remove_cb to a separate message that will * run later on its thread. This ensures this context unwinds and
* run later on its thread. This ensures this context unwinds and * we don't recursively unregister this bdev again if the event_cb
* we don't recursively unregister this bdev again if the remove_cb * immediately closes its descriptor.
* immediately closes its descriptor. */
*/ if (!desc->remove_scheduled) {
if (!desc->remove_scheduled) { /* Avoid scheduling removal of the same descriptor multiple times. */
/* Avoid scheduling removal of the same descriptor multiple times. */ desc->remove_scheduled = true;
desc->remove_scheduled = true; spdk_thread_send_msg(desc->thread, _remove_notify, desc);
spdk_thread_send_msg(desc->thread, _remove_notify, desc);
}
} }
} }
@ -4353,8 +4361,11 @@ spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_
remove_cb = _spdk_bdev_dummy_event_cb; remove_cb = _spdk_bdev_dummy_event_cb;
} }
desc->remove_cb = remove_cb; desc->callback.open_with_ext = false;
desc->remove_ctx = remove_ctx; desc->callback.remove_fn = remove_cb;
desc->callback.ctx = remove_ctx;
pthread_mutex_lock(&g_bdev_mgr.mutex);
rc = _spdk_bdev_open(bdev, write, desc); rc = _spdk_bdev_open(bdev, write, desc);
if (rc != 0) { if (rc != 0) {
@ -4364,6 +4375,55 @@ spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_
*_desc = desc; *_desc = desc;
pthread_mutex_unlock(&g_bdev_mgr.mutex);
return rc;
}
int
spdk_bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb,
void *event_ctx, struct spdk_bdev_desc **_desc)
{
struct spdk_bdev_desc *desc;
struct spdk_bdev *bdev;
int rc;
if (event_cb == NULL) {
SPDK_ERRLOG("Missing event callback function\n");
return -EINVAL;
}
pthread_mutex_lock(&g_bdev_mgr.mutex);
bdev = spdk_bdev_get_by_name(bdev_name);
if (bdev == NULL) {
SPDK_ERRLOG("Failed to find bdev with name: %s\n", bdev_name);
pthread_mutex_unlock(&g_bdev_mgr.mutex);
return -EINVAL;
}
desc = calloc(1, sizeof(*desc));
if (desc == NULL) {
SPDK_ERRLOG("Failed to allocate memory for bdev descriptor\n");
pthread_mutex_unlock(&g_bdev_mgr.mutex);
return -ENOMEM;
}
desc->callback.open_with_ext = true;
desc->callback.event_fn = event_cb;
desc->callback.ctx = event_ctx;
rc = _spdk_bdev_open(bdev, write, desc);
if (rc != 0) {
free(desc);
desc = NULL;
}
*_desc = desc;
pthread_mutex_unlock(&g_bdev_mgr.mutex);
return rc; return rc;
} }

View File

@ -64,6 +64,8 @@ DEFINE_STUB(spdk_notify_type_register, struct spdk_notify_type *, (const char *t
int g_status; int g_status;
int g_count; int g_count;
enum spdk_bdev_event_type g_event_type1;
enum spdk_bdev_event_type g_event_type2;
struct spdk_histogram_data *g_histogram; struct spdk_histogram_data *g_histogram;
void void
@ -1998,6 +2000,58 @@ bdev_open_while_hotremove(void)
free_bdev(bdev); free_bdev(bdev);
} }
static void
bdev_open_cb1(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
{
struct spdk_bdev_desc *desc = *(struct spdk_bdev_desc **)event_ctx;
g_event_type1 = type;
spdk_bdev_close(desc);
}
static void
bdev_open_cb2(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
{
struct spdk_bdev_desc *desc = *(struct spdk_bdev_desc **)event_ctx;
g_event_type2 = type;
spdk_bdev_close(desc);
}
static void
bdev_open_ext(void)
{
struct spdk_bdev *bdev;
struct spdk_bdev_desc *desc1 = NULL;
struct spdk_bdev_desc *desc2 = NULL;
int rc = 0;
bdev = allocate_bdev("bdev");
rc = spdk_bdev_open_ext("bdev", true, NULL, NULL, &desc1);
CU_ASSERT_EQUAL(rc, -EINVAL);
rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb1, &desc1, &desc1);
CU_ASSERT_EQUAL(rc, 0);
rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb2, &desc2, &desc2);
CU_ASSERT_EQUAL(rc, 0);
g_event_type1 = 0xFF;
g_event_type2 = 0xFF;
/* Simulate hot-unplug by unregistering bdev */
spdk_bdev_unregister(bdev, NULL, NULL);
poll_threads();
/* Check if correct events have been triggered in event callback fn */
CU_ASSERT_EQUAL(g_event_type1, SPDK_BDEV_EVENT_REMOVE);
CU_ASSERT_EQUAL(g_event_type2, SPDK_BDEV_EVENT_REMOVE);
free_bdev(bdev);
poll_threads();
}
int int
main(int argc, char **argv) main(int argc, char **argv)
{ {
@ -2030,7 +2084,8 @@ main(int argc, char **argv)
CU_add_test(suite, "bdev_io_alignment", bdev_io_alignment) == NULL || CU_add_test(suite, "bdev_io_alignment", bdev_io_alignment) == NULL ||
CU_add_test(suite, "bdev_histograms", bdev_histograms) == NULL || CU_add_test(suite, "bdev_histograms", bdev_histograms) == NULL ||
CU_add_test(suite, "bdev_write_zeroes", bdev_write_zeroes) == NULL || CU_add_test(suite, "bdev_write_zeroes", bdev_write_zeroes) == NULL ||
CU_add_test(suite, "bdev_open_while_hotremove", bdev_open_while_hotremove) == NULL CU_add_test(suite, "bdev_open_while_hotremove", bdev_open_while_hotremove) == NULL ||
CU_add_test(suite, "bdev_open_ext", bdev_open_ext) == NULL
) { ) {
CU_cleanup_registry(); CU_cleanup_registry();
return CU_get_error(); return CU_get_error();