From bcff088852faa8d1bf08fef6738c145e07bd2e15 Mon Sep 17 00:00:00 2001 From: Jim Harris Date: Wed, 22 Sep 2021 10:19:29 -0700 Subject: [PATCH] scheduler/dynamic: don't adjust tsc too much for very busy cores If a core has a very high busy percentage, we should not assume that moving a thread will gain that thread's busy tsc as newly idle cycles for the current core. So if the current core's percentage is above SCHEDULER_CORE_BUSY (95%), do not adjust the current core's busy/idle tsc when moving a thread off of it. If moving the thread does actually result in some newly idle tsc, it will get adjusted next scheduling period. Signed-off-by: Jim Harris Change-Id: I26a0282cd8f8e821809289b80c979cf94335353d Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9581 Community-CI: Broadcom CI Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins Reviewed-by: Tomasz Zawadzki Reviewed-by: Ben Walker --- module/scheduler/dynamic/scheduler_dynamic.c | 19 +++++++++++++++++++ test/unit/lib/event/reactor.c/reactor_ut.c | 6 +----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/module/scheduler/dynamic/scheduler_dynamic.c b/module/scheduler/dynamic/scheduler_dynamic.c index 863f572f1..dbd767296 100644 --- a/module/scheduler/dynamic/scheduler_dynamic.c +++ b/module/scheduler/dynamic/scheduler_dynamic.c @@ -53,6 +53,7 @@ static struct core_stats *g_cores; #define SCHEDULER_LOAD_LIMIT 20 #define SCHEDULER_CORE_LIMIT 95 +#define SCHEDULER_CORE_BUSY 95 static uint8_t _busy_pct(uint64_t busy, uint64_t idle) @@ -98,6 +99,8 @@ _move_thread(struct spdk_scheduler_thread_info *thread_info, uint32_t dst_core) struct core_stats *dst = &g_cores[dst_core]; struct core_stats *src = &g_cores[thread_info->lcore]; uint64_t busy_tsc = thread_info->current_stats.busy_tsc; + uint8_t busy_pct = _busy_pct(src->busy, src->idle); + uint64_t tsc; if (src == dst) { /* Don't modify stats if thread is already on that core. */ @@ -113,6 +116,22 @@ _move_thread(struct spdk_scheduler_thread_info *thread_info, uint32_t dst_core) src->busy -= spdk_min(src->busy, busy_tsc); src->idle += spdk_min(UINT64_MAX - src->idle, busy_tsc); + if (busy_pct >= SCHEDULER_CORE_BUSY && + _busy_pct(src->busy, src->idle) < SCHEDULER_CORE_LIMIT) { + /* This core was so busy that we cannot assume all of busy_tsc + * consumed by the moved thread will now be idle_tsc - it's + * very possible the remaining threads will use these cycles + * as busy_tsc. + * + * So make sure we don't drop the updated estimate below + * SCHEDULER_CORE_LIMIT, so that other cores can't + * move threads to this core during this scheduling + * period. + */ + tsc = src->busy + src->idle; + src->busy = tsc * SCHEDULER_CORE_LIMIT / 100; + src->idle = tsc - src->busy; + } assert(src->thread_count > 0); src->thread_count--; diff --git a/test/unit/lib/event/reactor.c/reactor_ut.c b/test/unit/lib/event/reactor.c/reactor_ut.c index 7bc5b286f..4089212aa 100644 --- a/test/unit/lib/event/reactor.c/reactor_ut.c +++ b/test/unit/lib/event/reactor.c/reactor_ut.c @@ -729,11 +729,7 @@ test_scheduler(void) for (i = 0; i < 3; i++) { reactor = spdk_reactor_get(i); CU_ASSERT(reactor != NULL); - if (i == 2) { - CU_ASSERT(TAILQ_EMPTY(&reactor->threads)); - } else { - CU_ASSERT(!TAILQ_EMPTY(&reactor->threads)); - } + CU_ASSERT(!TAILQ_EMPTY(&reactor->threads)); } g_reactor_state = SPDK_REACTOR_STATE_INITIALIZED;