Bdev: remove the QD poller at the time of Bdev unregister

Fix issue: #2561

The issue here is that in the bdev_set_qd_sampling_period RPC
command, the QD sampling period has been set. Then later the
related Desc is closed and in the bdev_close() function the
QD sampling period is reset to 0.

A new QD desc is added as the QD sampling period update could
be handled properly.

Meanwhile, a new QD Poll In Progress flag is also added so as
to indicate there are ongoing events of QD sampling and the
Bdev unregister will be handled in the proper way.

Related test case and unit test also updated for this change.

Change-Id: Iac86c2c6447fe338c7480cf468897fc8f41f8741
Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Signed-off-by: GangCao <gang.cao@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13016
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
This commit is contained in:
GangCao 2022-06-11 03:05:07 -04:00 committed by Jim Harris
parent c7db171c01
commit 48ce2c978e
5 changed files with 227 additions and 19 deletions

View File

@ -478,9 +478,15 @@ struct spdk_bdev {
/** poller for tracking the queue_depth of a device, NULL if not tracking */
struct spdk_poller *qd_poller;
/** open descriptor to use qd_poller safely */
struct spdk_bdev_desc *qd_desc;
/** period at which we poll for queue depth information */
uint64_t period;
/** new period to be used to poll for queue depth information */
uint64_t new_period;
/** used to aggregate queue depth while iterating across the bdev's open channels */
uint64_t temporary_queue_depth;
@ -496,6 +502,9 @@ struct spdk_bdev {
/** accumulated I/O statistics for previously deleted channels of this bdev */
struct spdk_bdev_io_stat stat;
/** true if tracking the queue_depth of a device is in progress */
bool qd_poll_in_progress;
/** histogram enabled on this bdev */
bool histogram_enabled;
bool histogram_in_progress;

View File

@ -6,7 +6,7 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
SO_VER := 9
SO_VER := 10
SO_MINOR := 0
ifeq ($(CONFIG_VTUNE),y)

View File

@ -3925,6 +3925,8 @@ spdk_bdev_get_io_time(const struct spdk_bdev *bdev)
return bdev->internal.io_time;
}
static void bdev_update_qd_sampling_period(void *ctx);
static void
_calculate_measured_qd_cpl(struct spdk_io_channel_iter *i, int status)
{
@ -3936,6 +3938,10 @@ _calculate_measured_qd_cpl(struct spdk_io_channel_iter *i, int status)
bdev->internal.io_time += bdev->internal.period;
bdev->internal.weighted_io_time += bdev->internal.period * bdev->internal.measured_queue_depth;
}
bdev->internal.qd_poll_in_progress = false;
bdev_update_qd_sampling_period(bdev);
}
static void
@ -3953,26 +3959,75 @@ static int
bdev_calculate_measured_queue_depth(void *ctx)
{
struct spdk_bdev *bdev = ctx;
bdev->internal.qd_poll_in_progress = true;
bdev->internal.temporary_queue_depth = 0;
spdk_for_each_channel(__bdev_to_io_dev(bdev), _calculate_measured_qd, bdev,
_calculate_measured_qd_cpl);
return SPDK_POLLER_BUSY;
}
static void
bdev_update_qd_sampling_period(void *ctx)
{
struct spdk_bdev *bdev = ctx;
if (bdev->internal.period == bdev->internal.new_period) {
return;
}
if (bdev->internal.qd_poll_in_progress) {
return;
}
bdev->internal.period = bdev->internal.new_period;
spdk_poller_unregister(&bdev->internal.qd_poller);
if (bdev->internal.period != 0) {
bdev->internal.qd_poller = SPDK_POLLER_REGISTER(bdev_calculate_measured_queue_depth,
bdev, bdev->internal.period);
} else {
spdk_bdev_close(bdev->internal.qd_desc);
bdev->internal.qd_desc = NULL;
}
}
static void
_tmp_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
{
SPDK_NOTICELOG("Unexpected event type: %d\n", type);
}
void
spdk_bdev_set_qd_sampling_period(struct spdk_bdev *bdev, uint64_t period)
{
int rc;
if (bdev->internal.new_period == period) {
return;
}
bdev->internal.new_period = period;
if (bdev->internal.qd_desc != NULL) {
assert(bdev->internal.period != 0);
spdk_thread_send_msg(bdev->internal.qd_desc->thread,
bdev_update_qd_sampling_period, bdev);
return;
}
assert(bdev->internal.period == 0);
rc = spdk_bdev_open_ext(spdk_bdev_get_name(bdev), false, _tmp_bdev_event_cb,
NULL, &bdev->internal.qd_desc);
if (rc != 0) {
return;
}
bdev->internal.period = period;
if (bdev->internal.qd_poller != NULL) {
spdk_poller_unregister(&bdev->internal.qd_poller);
bdev->internal.measured_queue_depth = UINT64_MAX;
}
if (period != 0) {
bdev->internal.qd_poller = SPDK_POLLER_REGISTER(bdev_calculate_measured_queue_depth, bdev,
period);
}
bdev->internal.qd_poller = SPDK_POLLER_REGISTER(bdev_calculate_measured_queue_depth,
bdev, period);
}
static void
@ -6071,6 +6126,9 @@ bdev_register(struct spdk_bdev *bdev)
}
bdev->internal.reset_in_progress = NULL;
bdev->internal.qd_poll_in_progress = false;
bdev->internal.period = 0;
bdev->internal.new_period = 0;
spdk_io_device_register(__bdev_to_io_dev(bdev),
bdev_channel_create, bdev_channel_destroy,
@ -6280,18 +6338,14 @@ spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void
pthread_mutex_unlock(&bdev->internal.mutex);
pthread_mutex_unlock(&g_bdev_mgr.mutex);
spdk_bdev_set_qd_sampling_period(bdev, 0);
spdk_for_each_channel(__bdev_to_io_dev(bdev),
bdev_unregister_abort_channel,
bdev,
bdev_unregister);
}
static void
_tmp_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
{
SPDK_NOTICELOG("Unexpected event type: %d\n", type);
}
int
spdk_bdev_unregister_by_name(const char *bdev_name, struct spdk_bdev_module *module,
spdk_bdev_unregister_cb cb_fn, void *cb_arg)
@ -6505,8 +6559,6 @@ bdev_close(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc)
}
}
spdk_bdev_set_qd_sampling_period(bdev, 0);
if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING && TAILQ_EMPTY(&bdev->internal.open_descs)) {
rc = bdev_unregister_unsafe(bdev);
pthread_mutex_unlock(&bdev->internal.mutex);

View File

@ -353,6 +353,46 @@ function qos_test_suite() {
trap - SIGINT SIGTERM EXIT
}
function qd_sampling_function_test() {
local bdev_name=$1
local sampling_period=10
local iostats
$rpc_py bdev_set_qd_sampling_period $bdev_name $sampling_period
iostats=$($rpc_py bdev_get_iostat -b $bdev_name)
qd_sampling_period=$(jq -r '.bdevs[0].queue_depth_polling_period' <<< "$iostats")
if [ $qd_sampling_period == null ] || [ $qd_sampling_period -ne $sampling_period ]; then
echo "Qeueue depth polling period is not right"
$rpc_py bdev_malloc_delete $QD_DEV
killprocess $QD_PID
exit 1
fi
}
function qd_sampling_test_suite() {
QD_DEV="Malloc_QD"
"$testdir/bdevperf/bdevperf" -z -m 0x3 -q 256 -o 4096 -w randread -t 5 -C "$env_ctx" &
QD_PID=$!
echo "Process bdev QD sampling period testing pid: $QD_PID"
trap 'cleanup; killprocess $QD_PID; exit 1' SIGINT SIGTERM EXIT
waitforlisten $QD_PID
$rpc_py bdev_malloc_create -b $QD_DEV 128 512
waitforbdev $QD_DEV
$rootdir/test/bdev/bdevperf/bdevperf.py perform_tests &
sleep 2
qd_sampling_function_test $QD_DEV
$rpc_py bdev_malloc_delete $QD_DEV
killprocess $QD_PID
trap - SIGINT SIGTERM EXIT
}
# Inital bdev creation and configuration
#-----------------------------------------------------
QOS_DEV_1="Malloc_0"
@ -457,6 +497,7 @@ run_test "bdev_write_zeroes" $testdir/bdevperf/bdevperf --json "$conf_file" -q 1
if [[ $test_type == bdev ]]; then
run_test "bdev_qos" qos_test_suite "$env_ctx"
run_test "bdev_qd_sampling" qd_sampling_test_suite "$env_ctx"
fi
# Temporarily disabled - infinite loop

View File

@ -4095,6 +4095,111 @@ bdev_set_io_timeout(void)
poll_threads();
}
static void
bdev_set_qd_sampling(void)
{
struct spdk_bdev *bdev;
struct spdk_bdev_desc *desc = NULL;
struct spdk_io_channel *io_ch = NULL;
struct spdk_bdev_channel *bdev_ch = NULL;
struct timeout_io_cb_arg cb_arg;
spdk_bdev_initialize(bdev_init_cb, NULL);
bdev = allocate_bdev("bdev");
CU_ASSERT(spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc) == 0);
SPDK_CU_ASSERT_FATAL(desc != NULL);
CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
io_ch = spdk_bdev_get_io_channel(desc);
CU_ASSERT(io_ch != NULL);
bdev_ch = spdk_io_channel_get_ctx(io_ch);
CU_ASSERT(TAILQ_EMPTY(&bdev_ch->io_submitted));
/* This is the part1.
* We will check the bdev_ch->io_submitted list
* TO make sure that it can link IOs and only the user submitted IOs
*/
CU_ASSERT(spdk_bdev_read(desc, io_ch, (void *)0x1000, 0, 4096, io_done, NULL) == 0);
CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
CU_ASSERT(spdk_bdev_write(desc, io_ch, (void *)0x2000, 0, 4096, io_done, NULL) == 0);
CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 2);
stub_complete_io(1);
CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
stub_complete_io(1);
CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 0);
/* This is the part2.
* Test the bdev's qd poller register
*/
/* 1st Successfully set the qd sampling period */
spdk_bdev_set_qd_sampling_period(bdev, 10);
CU_ASSERT(bdev->internal.new_period == 10);
CU_ASSERT(bdev->internal.period == 10);
CU_ASSERT(bdev->internal.qd_desc != NULL);
poll_threads();
CU_ASSERT(bdev->internal.qd_poller != NULL);
/* 2nd Change the qd sampling period */
spdk_bdev_set_qd_sampling_period(bdev, 20);
CU_ASSERT(bdev->internal.new_period == 20);
CU_ASSERT(bdev->internal.period == 10);
CU_ASSERT(bdev->internal.qd_desc != NULL);
poll_threads();
CU_ASSERT(bdev->internal.qd_poller != NULL);
CU_ASSERT(bdev->internal.period == bdev->internal.new_period);
/* 3rd Change the qd sampling period and verify qd_poll_in_progress */
spdk_delay_us(20);
poll_thread_times(0, 1);
CU_ASSERT(bdev->internal.qd_poll_in_progress == true);
spdk_bdev_set_qd_sampling_period(bdev, 30);
CU_ASSERT(bdev->internal.new_period == 30);
CU_ASSERT(bdev->internal.period == 20);
poll_threads();
CU_ASSERT(bdev->internal.qd_poll_in_progress == false);
CU_ASSERT(bdev->internal.period == bdev->internal.new_period);
/* 4th Disable the qd sampling period */
spdk_bdev_set_qd_sampling_period(bdev, 0);
CU_ASSERT(bdev->internal.new_period == 0);
CU_ASSERT(bdev->internal.period == 30);
poll_threads();
CU_ASSERT(bdev->internal.qd_poller == NULL);
CU_ASSERT(bdev->internal.period == bdev->internal.new_period);
CU_ASSERT(bdev->internal.qd_desc == NULL);
/* This is the part3.
* We will test the submitted IO and reset works
* properly with the qd sampling.
*/
memset(&cb_arg, 0, sizeof(cb_arg));
spdk_bdev_set_qd_sampling_period(bdev, 1);
poll_threads();
CU_ASSERT(spdk_bdev_write(desc, io_ch, (void *)0x2000, 0, 4096, io_done, NULL) == 0);
CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
/* Also include the reset IO */
memset(&cb_arg, 0, sizeof(cb_arg));
CU_ASSERT(spdk_bdev_reset(desc, io_ch, io_done, NULL) == 0);
poll_threads();
/* Close the desc */
spdk_put_io_channel(io_ch);
spdk_bdev_close(desc);
/* Complete the submitted IO and reset */
stub_complete_io(2);
poll_threads();
free_bdev(bdev);
spdk_bdev_finish(bdev_fini_cb, NULL);
poll_threads();
}
static void
lba_range_overlap(void)
{
@ -5371,6 +5476,7 @@ main(int argc, char **argv)
CU_ADD_TEST(suite, bdev_open_ext);
CU_ADD_TEST(suite, bdev_open_ext_unregister);
CU_ADD_TEST(suite, bdev_set_io_timeout);
CU_ADD_TEST(suite, bdev_set_qd_sampling);
CU_ADD_TEST(suite, lba_range_overlap);
CU_ADD_TEST(suite, lock_lba_range_check_ranges);
CU_ADD_TEST(suite, lock_lba_range_with_io_outstanding);