From ce23de932d59cefd0b36a920cc93d83d370cc97f Mon Sep 17 00:00:00 2001 From: Maciej Szwed Date: Thu, 17 Dec 2020 10:10:53 +0100 Subject: [PATCH] scheduler: Change main core frequency dynamically Use DPDK based governor to change first core frequency in dynamic scheduler. Core frequencies are adjusted based on relative usages. Signed-off-by: Maciej Szwed Change-Id: Ibeb9ca59fd67df27cfb0bfe752e66e5eef41b126 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5438 Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Tomasz Zawadzki Reviewed-by: Paul Luse --- lib/event/scheduler_dynamic.c | 147 ++++++++++--- test/blobstore/btest.out.ignore | 1 + test/unit/lib/event/reactor.c/reactor_ut.c | 231 +++++++++++++++++++++ 3 files changed, 349 insertions(+), 30 deletions(-) diff --git a/lib/event/scheduler_dynamic.c b/lib/event/scheduler_dynamic.c index 7f7fac613..ffc9dc963 100644 --- a/lib/event/scheduler_dynamic.c +++ b/lib/event/scheduler_dynamic.c @@ -42,20 +42,17 @@ static uint32_t g_next_lcore = SPDK_ENV_LCORE_ID_ANY; static uint32_t g_main_lcore; +static bool g_core_mngmnt_available; +uint64_t g_last_main_core_busy, g_last_main_core_idle; #define SCHEDULER_THREAD_BUSY 100 +#define SCHEDULER_LOAD_LIMIT 50 static uint8_t _get_thread_load(struct spdk_lw_thread *lw_thread) { uint64_t busy, idle; - if (lw_thread->last_stats.busy_tsc == 0 && lw_thread->last_stats.idle_tsc == 0) { - lw_thread->last_stats.busy_tsc = lw_thread->snapshot_stats.busy_tsc; - lw_thread->last_stats.idle_tsc = lw_thread->snapshot_stats.idle_tsc; - return SCHEDULER_THREAD_BUSY; - } - busy = lw_thread->snapshot_stats.busy_tsc - lw_thread->last_stats.busy_tsc; idle = lw_thread->snapshot_stats.idle_tsc - lw_thread->last_stats.idle_tsc; @@ -69,11 +66,45 @@ _get_thread_load(struct spdk_lw_thread *lw_thread) static int init(struct spdk_governor *governor) { + int rc; + g_main_lcore = spdk_env_get_current_core(); + rc = _spdk_governor_set("dpdk_governor"); + g_core_mngmnt_available = !rc; + + g_last_main_core_busy = 0; + g_last_main_core_idle = 0; + return 0; } +static int +deinit(struct spdk_governor *governor) +{ + uint32_t i; + int rc = 0; + + if (!g_core_mngmnt_available) { + return 0; + } + + if (governor->deinit_core) { + SPDK_ENV_FOREACH_CORE(i) { + rc = governor->deinit_core(i); + if (rc != 0) { + SPDK_ERRLOG("Failed to deinitialize governor for core %d\n", i); + } + } + } + + if (governor->deinit) { + rc = governor->deinit(); + } + + return rc; +} + static void balance(struct spdk_scheduler_core_info *cores_info, int cores_count, struct spdk_governor *governor) @@ -82,11 +113,21 @@ balance(struct spdk_scheduler_core_info *cores_info, int cores_count, struct spdk_thread *thread; struct spdk_scheduler_core_info *core; struct spdk_cpuset *cpumask; + uint64_t main_core_busy; + uint64_t main_core_idle; + uint64_t thread_busy; uint32_t target_lcore; uint32_t i, j, k; + int rc; + uint8_t load; + bool busy_threads_present = false; - /* Distribute active threads across all cores except first one - * and move idle threads to first core */ + main_core_busy = cores_info[g_main_lcore].core_busy_tsc - g_last_main_core_busy; + main_core_idle = cores_info[g_main_lcore].core_idle_tsc - g_last_main_core_idle; + g_last_main_core_busy = cores_info[g_main_lcore].core_busy_tsc; + g_last_main_core_idle = cores_info[g_main_lcore].core_idle_tsc; + + /* Distribute active threads across all cores and move idle threads to main core */ SPDK_ENV_FOREACH_CORE(i) { core = &cores_info[i]; for (j = 0; j < core->threads_count; j++) { @@ -95,39 +136,85 @@ balance(struct spdk_scheduler_core_info *cores_info, int cores_count, thread = spdk_thread_get_from_ctx(lw_thread); cpumask = spdk_thread_get_cpumask(thread); - if (_get_thread_load(lw_thread) < 50) { - /* Continue searching for active threads */ + if (lw_thread->last_stats.busy_tsc + lw_thread->last_stats.idle_tsc == 0) { + lw_thread->last_stats.busy_tsc = lw_thread->snapshot_stats.busy_tsc; + lw_thread->last_stats.idle_tsc = lw_thread->snapshot_stats.idle_tsc; + + if (i != g_main_lcore) { + busy_threads_present = true; + } + + continue; + } + + thread_busy = lw_thread->snapshot_stats.busy_tsc - lw_thread->last_stats.busy_tsc; + + load = _get_thread_load(lw_thread); + + if (i == g_main_lcore && load >= SCHEDULER_LOAD_LIMIT) { + /* This thread is active and on the main core, we need to pick a core to move it to */ + for (k = 0; k < spdk_env_get_core_count(); k++) { + if (g_next_lcore == SPDK_ENV_LCORE_ID_ANY) { + g_next_lcore = spdk_env_get_first_core(); + } + + target_lcore = g_next_lcore; + g_next_lcore = spdk_env_get_next_core(g_next_lcore); + + if (spdk_cpuset_get_cpu(cpumask, target_lcore)) { + lw_thread->new_lcore = target_lcore; + + if (target_lcore != g_main_lcore) { + busy_threads_present = true; + main_core_idle += spdk_min(UINT64_MAX - main_core_idle, thread_busy); + main_core_busy -= spdk_min(main_core_busy, thread_busy); + } + + break; + } + } + } else if (i != g_main_lcore && load < SCHEDULER_LOAD_LIMIT) { + /* This thread is idle but not on the main core, so we need to move it to the main core */ lw_thread->new_lcore = g_main_lcore; - continue; - } - if (i != g_main_lcore) { - /* Do not move active thread if it is not on the main core */ - continue; - } - - /* Find a suitable reactor */ - for (k = 0; k < spdk_env_get_core_count(); k++) { - if (g_next_lcore == SPDK_ENV_LCORE_ID_ANY) { - g_next_lcore = spdk_env_get_first_core(); - } - - target_lcore = g_next_lcore; - g_next_lcore = spdk_env_get_next_core(g_next_lcore); - - if (spdk_cpuset_get_cpu(cpumask, target_lcore)) { - lw_thread->new_lcore = target_lcore; - break; + main_core_busy += spdk_min(UINT64_MAX - main_core_busy, thread_busy); + main_core_idle -= spdk_min(main_core_idle, thread_busy); + } else { + /* This thread should remain on the same core */ + if (i != g_main_lcore) { + busy_threads_present = true; } } } } + + if (!g_core_mngmnt_available) { + return; + } + + /* Change main core frequency if needed */ + if (busy_threads_present) { + rc = governor->set_core_freq_max(g_main_lcore); + if (rc < 0) { + SPDK_ERRLOG("setting default frequency for core %u failed\n", g_main_lcore); + } + } else if (main_core_busy > main_core_idle) { + rc = governor->core_freq_up(g_main_lcore); + if (rc < 0) { + SPDK_ERRLOG("increasing frequency for core %u failed\n", g_main_lcore); + } + } else { + rc = governor->core_freq_down(g_main_lcore); + if (rc < 0) { + SPDK_ERRLOG("lowering frequency for core %u failed\n", g_main_lcore); + } + } } static struct spdk_scheduler scheduler_dynamic = { .name = "dynamic", .init = init, - .deinit = NULL, + .deinit = deinit, .balance = balance, }; diff --git a/test/blobstore/btest.out.ignore b/test/blobstore/btest.out.ignore index 8a1df3d18..36fd001a4 100644 --- a/test/blobstore/btest.out.ignore +++ b/test/blobstore/btest.out.ignore @@ -3,3 +3,4 @@ EAL Working cryptodev_aesni_mb_create +POWER diff --git a/test/unit/lib/event/reactor.c/reactor_ut.c b/test/unit/lib/event/reactor.c/reactor_ut.c index 0ca51dd17..c1b5c4e8f 100644 --- a/test/unit/lib/event/reactor.c/reactor_ut.c +++ b/test/unit/lib/event/reactor.c/reactor_ut.c @@ -609,6 +609,236 @@ test_scheduler(void) free_cores(); } +uint8_t g_curr_freq; + +static int +core_freq_up(uint32_t lcore) +{ + if (g_curr_freq != UINT8_MAX) { + g_curr_freq++; + } + + return 0; +} + +static int +core_freq_down(uint32_t lcore) +{ + if (g_curr_freq != 0) { + g_curr_freq--; + } + + return 0; +} + +static int +core_freq_max(uint32_t lcore) +{ + g_curr_freq = UINT8_MAX; + + return 0; +} + +static struct spdk_governor governor = { + .name = "dpdk_governor", + .get_core_freqs = NULL, + .get_core_curr_freq = NULL, + .set_core_freq = NULL, + .core_freq_up = core_freq_up, + .core_freq_down = core_freq_down, + .set_core_freq_max = core_freq_max, + .set_core_freq_min = NULL, + .get_core_turbo_status = NULL, + .enable_core_turbo = NULL, + .disable_core_turbo = NULL, + .get_core_capabilities = NULL, + .init_core = NULL, + .deinit_core = NULL, + .init = NULL, + .deinit = NULL, +}; + +static void +test_governor(void) +{ + struct spdk_cpuset cpuset = {}; + struct spdk_thread *thread[2]; + struct spdk_lw_thread *lw_thread; + struct spdk_reactor *reactor; + struct spdk_poller *busy, *idle; + uint8_t last_freq = 100; + int i; + + MOCK_SET(spdk_env_get_current_core, 0); + + g_curr_freq = last_freq; + _spdk_governor_list_add(&governor); + + allocate_cores(2); + + CU_ASSERT(spdk_reactors_init() == 0); + + _spdk_scheduler_set("dynamic"); + + for (i = 0; i < 2; i++) { + spdk_cpuset_set_cpu(&g_reactor_core_mask, i, true); + } + + /* Create threads. */ + for (i = 0; i < 2; i++) { + spdk_cpuset_set_cpu(&cpuset, i, true); + thread[i] = spdk_thread_create(NULL, &cpuset); + CU_ASSERT(thread[i] != NULL); + } + + for (i = 0; i < 2; i++) { + reactor = spdk_reactor_get(i); + CU_ASSERT(reactor != NULL); + MOCK_SET(spdk_env_get_current_core, i); + CU_ASSERT(event_queue_run_batch(reactor) == 1); + CU_ASSERT(!TAILQ_EMPTY(&reactor->threads)); + } + + reactor = spdk_reactor_get(0); + CU_ASSERT(reactor != NULL); + MOCK_SET(spdk_env_get_current_core, 0); + + g_reactor_state = SPDK_REACTOR_STATE_RUNNING; + + /* TEST 1 */ + /* Init thread stats (low load) */ + MOCK_SET(spdk_get_ticks, 100); + reactor->tsc_last = 100; + + for (i = 0; i < 2; i++) { + spdk_set_thread(thread[i]); + idle = spdk_poller_register(poller_run_idle, (void *)200, 0); + reactor = spdk_reactor_get(i); + CU_ASSERT(reactor != NULL); + MOCK_SET(spdk_env_get_current_core, i); + _reactor_run(reactor); + spdk_poller_unregister(&idle); + + /* Update last stats so that we don't have to call scheduler twice */ + lw_thread = spdk_thread_get_ctx(thread[i]); + lw_thread->last_stats.idle_tsc = 1; + } + + reactor = spdk_reactor_get(0); + CU_ASSERT(reactor != NULL); + MOCK_SET(spdk_env_get_current_core, 0); + + _reactors_scheduler_gather_metrics(NULL, NULL); + + /* Gather metrics for cores */ + reactor = spdk_reactor_get(1); + CU_ASSERT(reactor != NULL); + MOCK_SET(spdk_env_get_current_core, 1); + CU_ASSERT(event_queue_run_batch(reactor) == 1); + reactor = spdk_reactor_get(0); + CU_ASSERT(reactor != NULL); + MOCK_SET(spdk_env_get_current_core, 0); + CU_ASSERT(event_queue_run_batch(reactor) == 1); + + /* Threads were idle, so all of them should be placed on core 0 */ + for (i = 0; i < 2; i++) { + reactor = spdk_reactor_get(i); + CU_ASSERT(reactor != NULL); + _reactor_run(reactor); + } + + /* 1 thread should be scheduled to core 0 */ + reactor = spdk_reactor_get(0); + CU_ASSERT(reactor != NULL); + MOCK_SET(spdk_env_get_current_core, 0); + CU_ASSERT(event_queue_run_batch(reactor) == 1); + + /* Main core should be busy less than 50% time now - frequency should be lowered */ + CU_ASSERT(g_curr_freq == last_freq - 1); + + last_freq = g_curr_freq; + + /* TEST 2 */ + /* Make first threads busy - both threads will be still on core 0, but frequency will have to be raised */ + spdk_set_thread(thread[0]); + busy = spdk_poller_register(poller_run_busy, (void *)1000, 0); + _reactor_run(reactor); + spdk_poller_unregister(&busy); + + spdk_set_thread(thread[1]); + idle = spdk_poller_register(poller_run_idle, (void *)100, 0); + _reactor_run(reactor); + spdk_poller_unregister(&idle); + + /* Run scheduler again */ + _reactors_scheduler_gather_metrics(NULL, NULL); + + /* Gather metrics */ + reactor = spdk_reactor_get(1); + CU_ASSERT(reactor != NULL); + MOCK_SET(spdk_env_get_current_core, 1); + CU_ASSERT(event_queue_run_batch(reactor) == 1); + reactor = spdk_reactor_get(0); + CU_ASSERT(reactor != NULL); + MOCK_SET(spdk_env_get_current_core, 0); + CU_ASSERT(event_queue_run_batch(reactor) == 1); + + /* Main core should be busy more than 50% time now - frequency should be raised */ + CU_ASSERT(g_curr_freq == last_freq + 1); + + /* TEST 3 */ + /* Make second thread very busy so that it will be moved to second core */ + spdk_set_thread(thread[1]); + busy = spdk_poller_register(poller_run_busy, (void *)1000, 0); + _reactor_run(reactor); + spdk_poller_unregister(&busy); + + /* Update first thread stats */ + spdk_set_thread(thread[0]); + idle = spdk_poller_register(poller_run_idle, (void *)100, 0); + _reactor_run(reactor); + spdk_poller_unregister(&idle); + + /* Run scheduler again */ + _reactors_scheduler_gather_metrics(NULL, NULL); + + /* Gather metrics */ + reactor = spdk_reactor_get(1); + CU_ASSERT(reactor != NULL); + MOCK_SET(spdk_env_get_current_core, 1); + CU_ASSERT(event_queue_run_batch(reactor) == 1); + reactor = spdk_reactor_get(0); + CU_ASSERT(reactor != NULL); + MOCK_SET(spdk_env_get_current_core, 0); + CU_ASSERT(event_queue_run_batch(reactor) == 1); + + for (i = 0; i < 2; i++) { + reactor = spdk_reactor_get(i); + CU_ASSERT(reactor != NULL); + _reactor_run(reactor); + } + + /* Main core frequency should be set to max when we have busy threads on other cores */ + CU_ASSERT(g_curr_freq == UINT8_MAX); + + g_reactor_state = SPDK_REACTOR_STATE_INITIALIZED; + + /* Destroy threads */ + for (i = 0; i < 2; i++) { + reactor = spdk_reactor_get(i); + CU_ASSERT(reactor != NULL); + reactor_run(reactor); + } + + spdk_set_thread(NULL); + + MOCK_CLEAR(spdk_env_get_current_core); + + spdk_reactors_fini(); + + free_cores(); +} + int main(int argc, char **argv) { @@ -628,6 +858,7 @@ main(int argc, char **argv) CU_ADD_TEST(suite, test_for_each_reactor); CU_ADD_TEST(suite, test_reactor_stats); CU_ADD_TEST(suite, test_scheduler); + CU_ADD_TEST(suite, test_governor); CU_basic_set_mode(CU_BRM_VERBOSE); CU_basic_run_tests();