bdev/raid: Fix race issue among multiple threads to free RAID bdev
The following issue was observed.
The first thread returned the last IO channel and the second thread
then removed the first base device, but raid_bdev_cleanup() was
called before raid_bdev_destroy_cb() was called.
raid_bdev_destroy_cb() was accessed to the raid bdev already freed
by raid_bdev_cleanup() and caused segmentation fault.
The call sequence was as follows:
The first thread:
spdk_put_io_channel() -> ch->destroy_cb -> raid_bdev_destroy_cb
-> access raid bdev
The second thread:
raid_bdev_remove_base_devices() -> raid_bdev_deconfigure() ->
spdk_bdev_unregister() -> spdk_io_device_unregister() ->
spdk_bdev_destroy_cb() -> raid_bdev_destruct() -> raid_bdev_cleanup()
-> free raid bdev
The fix is to hold number of created channels in struct
raid_bdev_io_channel and use it in raid_bdev_destroy_cb().
Bdev layer, IO device/channel layer, and NVMe-oF layer already
process this case correctly.
Fixes #884.
Reported-by: yidong0635 <dongx.yi@intel.com>
Signed-off-by: yidong0635 <dongx.yi@intel.com>
Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/463249 (master)
(cherry picked from commit 284aca9e36
)
Change-Id: Ie9d61bdddca479ce7f491ff9a08db45e71f16a8d
Signed-off-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/467130
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
parent
23cd855116
commit
077ae9152d
@ -96,13 +96,15 @@ raid_bdev_create_cb(void *io_device, void *ctx_buf)
|
||||
assert(raid_bdev != NULL);
|
||||
assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE);
|
||||
|
||||
raid_ch->base_channel = calloc(raid_bdev->num_base_bdevs,
|
||||
raid_ch->num_channels = raid_bdev->num_base_bdevs;
|
||||
|
||||
raid_ch->base_channel = calloc(raid_ch->num_channels,
|
||||
sizeof(struct spdk_io_channel *));
|
||||
if (!raid_ch->base_channel) {
|
||||
SPDK_ERRLOG("Unable to allocate base bdevs io channel\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
for (uint32_t i = 0; i < raid_bdev->num_base_bdevs; i++) {
|
||||
for (uint8_t i = 0; i < raid_ch->num_channels; i++) {
|
||||
/*
|
||||
* Get the spdk_io_channel for all the base bdevs. This is used during
|
||||
* split logic to send the respective child bdev ios to respective base
|
||||
@ -111,7 +113,7 @@ raid_bdev_create_cb(void *io_device, void *ctx_buf)
|
||||
raid_ch->base_channel[i] = spdk_bdev_get_io_channel(
|
||||
raid_bdev->base_bdev_info[i].desc);
|
||||
if (!raid_ch->base_channel[i]) {
|
||||
for (uint32_t j = 0; j < i; j++) {
|
||||
for (uint8_t j = 0; j < i; j++) {
|
||||
spdk_put_io_channel(raid_ch->base_channel[j]);
|
||||
}
|
||||
free(raid_ch->base_channel);
|
||||
@ -138,14 +140,12 @@ static void
|
||||
raid_bdev_destroy_cb(void *io_device, void *ctx_buf)
|
||||
{
|
||||
struct raid_bdev_io_channel *raid_ch = ctx_buf;
|
||||
struct raid_bdev *raid_bdev = io_device;
|
||||
|
||||
SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destroy_cb\n");
|
||||
|
||||
assert(raid_bdev != NULL);
|
||||
assert(raid_ch != NULL);
|
||||
assert(raid_ch->base_channel);
|
||||
for (uint32_t i = 0; i < raid_bdev->num_base_bdevs; i++) {
|
||||
for (uint8_t i = 0; i < raid_ch->num_channels; i++) {
|
||||
/* Free base bdev channels */
|
||||
assert(raid_ch->base_channel[i] != NULL);
|
||||
spdk_put_io_channel(raid_ch->base_channel[i]);
|
||||
|
@ -203,6 +203,9 @@ struct raid_config {
|
||||
struct raid_bdev_io_channel {
|
||||
/* Array of IO channels of base bdevs */
|
||||
struct spdk_io_channel **base_channel;
|
||||
|
||||
/* Number of IO channels */
|
||||
uint8_t num_channels;
|
||||
};
|
||||
|
||||
/* TAIL heads for various raid bdev lists */
|
||||
|
Loading…
Reference in New Issue
Block a user