Former code, there're many repeated defines. And some add asserts checking valid event and some don't add. To get the right reports from debugging mode and catch the errors, so encapsulate a common function to do these. And add assert in this function. This will help get the right failure point. Signed-off-by: yidong0635 <dongx.yi@intel.com> Change-Id: I23d71eac6652c4104ceff80419f39634ac5ce395 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14335 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
1552 lines
39 KiB
C
1552 lines
39 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright (c) Intel Corporation.
|
|
* All rights reserved.
|
|
*/
|
|
|
|
#include "spdk/stdinc.h"
|
|
#include "spdk/likely.h"
|
|
|
|
#include "spdk_internal/event.h"
|
|
#include "spdk_internal/usdt.h"
|
|
|
|
#include "spdk/log.h"
|
|
#include "spdk/thread.h"
|
|
#include "spdk/env.h"
|
|
#include "spdk/util.h"
|
|
#include "spdk/scheduler.h"
|
|
#include "spdk/string.h"
|
|
#include "spdk/fd_group.h"
|
|
|
|
#ifdef __linux__
|
|
#include <sys/prctl.h>
|
|
#include <sys/eventfd.h>
|
|
#endif
|
|
|
|
#ifdef __FreeBSD__
|
|
#include <pthread_np.h>
|
|
#endif
|
|
|
|
#define SPDK_EVENT_BATCH_SIZE 8
|
|
|
|
static struct spdk_reactor *g_reactors;
|
|
static uint32_t g_reactor_count;
|
|
static struct spdk_cpuset g_reactor_core_mask;
|
|
static enum spdk_reactor_state g_reactor_state = SPDK_REACTOR_STATE_UNINITIALIZED;
|
|
|
|
static bool g_framework_context_switch_monitor_enabled = true;
|
|
|
|
static struct spdk_mempool *g_spdk_event_mempool = NULL;
|
|
|
|
TAILQ_HEAD(, spdk_scheduler) g_scheduler_list
|
|
= TAILQ_HEAD_INITIALIZER(g_scheduler_list);
|
|
|
|
static struct spdk_scheduler *g_scheduler = NULL;
|
|
static struct spdk_reactor *g_scheduling_reactor;
|
|
bool g_scheduling_in_progress = false;
|
|
static uint64_t g_scheduler_period = 0;
|
|
static uint32_t g_scheduler_core_number;
|
|
static struct spdk_scheduler_core_info *g_core_infos = NULL;
|
|
|
|
TAILQ_HEAD(, spdk_governor) g_governor_list
|
|
= TAILQ_HEAD_INITIALIZER(g_governor_list);
|
|
|
|
static struct spdk_governor *g_governor = NULL;
|
|
|
|
static int reactor_interrupt_init(struct spdk_reactor *reactor);
|
|
static void reactor_interrupt_fini(struct spdk_reactor *reactor);
|
|
|
|
static pthread_mutex_t g_stopping_reactors_mtx = PTHREAD_MUTEX_INITIALIZER;
|
|
static bool g_stopping_reactors = false;
|
|
|
|
static struct spdk_scheduler *
|
|
_scheduler_find(const char *name)
|
|
{
|
|
struct spdk_scheduler *tmp;
|
|
|
|
TAILQ_FOREACH(tmp, &g_scheduler_list, link) {
|
|
if (strcmp(name, tmp->name) == 0) {
|
|
return tmp;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
int
|
|
spdk_scheduler_set(const char *name)
|
|
{
|
|
struct spdk_scheduler *scheduler;
|
|
int rc = 0;
|
|
|
|
/* NULL scheduler was specifically requested */
|
|
if (name == NULL) {
|
|
if (g_scheduler) {
|
|
g_scheduler->deinit();
|
|
}
|
|
g_scheduler = NULL;
|
|
return 0;
|
|
}
|
|
|
|
scheduler = _scheduler_find(name);
|
|
if (scheduler == NULL) {
|
|
SPDK_ERRLOG("Requested scheduler is missing\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (g_scheduler == scheduler) {
|
|
return 0;
|
|
}
|
|
|
|
rc = scheduler->init();
|
|
if (rc == 0) {
|
|
if (g_scheduler) {
|
|
g_scheduler->deinit();
|
|
}
|
|
g_scheduler = scheduler;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
struct spdk_scheduler *
|
|
spdk_scheduler_get(void)
|
|
{
|
|
return g_scheduler;
|
|
}
|
|
|
|
uint64_t
|
|
spdk_scheduler_get_period(void)
|
|
{
|
|
/* Convert from ticks to microseconds */
|
|
return (g_scheduler_period * SPDK_SEC_TO_USEC / spdk_get_ticks_hz());
|
|
}
|
|
|
|
void
|
|
spdk_scheduler_set_period(uint64_t period)
|
|
{
|
|
/* Convert microseconds to ticks */
|
|
g_scheduler_period = period * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
|
|
}
|
|
|
|
void
|
|
spdk_scheduler_register(struct spdk_scheduler *scheduler)
|
|
{
|
|
if (_scheduler_find(scheduler->name)) {
|
|
SPDK_ERRLOG("scheduler named '%s' already registered.\n", scheduler->name);
|
|
assert(false);
|
|
return;
|
|
}
|
|
|
|
TAILQ_INSERT_TAIL(&g_scheduler_list, scheduler, link);
|
|
}
|
|
|
|
static void
|
|
reactor_construct(struct spdk_reactor *reactor, uint32_t lcore)
|
|
{
|
|
reactor->lcore = lcore;
|
|
reactor->flags.is_valid = true;
|
|
|
|
TAILQ_INIT(&reactor->threads);
|
|
reactor->thread_count = 0;
|
|
spdk_cpuset_zero(&reactor->notify_cpuset);
|
|
|
|
reactor->events = spdk_ring_create(SPDK_RING_TYPE_MP_SC, 65536, SPDK_ENV_SOCKET_ID_ANY);
|
|
if (reactor->events == NULL) {
|
|
SPDK_ERRLOG("Failed to allocate events ring\n");
|
|
assert(false);
|
|
}
|
|
|
|
/* Always initialize interrupt facilities for reactor */
|
|
if (reactor_interrupt_init(reactor) != 0) {
|
|
/* Reactor interrupt facilities are necessary if seting app to interrupt mode. */
|
|
if (spdk_interrupt_mode_is_enabled()) {
|
|
SPDK_ERRLOG("Failed to prepare intr facilities\n");
|
|
assert(false);
|
|
}
|
|
return;
|
|
}
|
|
|
|
/* If application runs with full interrupt ability,
|
|
* all reactors are going to run in interrupt mode.
|
|
*/
|
|
if (spdk_interrupt_mode_is_enabled()) {
|
|
uint32_t i;
|
|
|
|
SPDK_ENV_FOREACH_CORE(i) {
|
|
spdk_cpuset_set_cpu(&reactor->notify_cpuset, i, true);
|
|
}
|
|
reactor->in_interrupt = true;
|
|
}
|
|
}
|
|
|
|
struct spdk_reactor *
|
|
spdk_reactor_get(uint32_t lcore)
|
|
{
|
|
struct spdk_reactor *reactor;
|
|
|
|
if (g_reactors == NULL) {
|
|
SPDK_WARNLOG("Called spdk_reactor_get() while the g_reactors array was NULL!\n");
|
|
return NULL;
|
|
}
|
|
|
|
if (lcore >= g_reactor_count) {
|
|
return NULL;
|
|
}
|
|
|
|
reactor = &g_reactors[lcore];
|
|
|
|
if (reactor->flags.is_valid == false) {
|
|
return NULL;
|
|
}
|
|
|
|
return reactor;
|
|
}
|
|
|
|
static int reactor_thread_op(struct spdk_thread *thread, enum spdk_thread_op op);
|
|
static bool reactor_thread_op_supported(enum spdk_thread_op op);
|
|
|
|
int
|
|
spdk_reactors_init(size_t msg_mempool_size)
|
|
{
|
|
struct spdk_reactor *reactor;
|
|
int rc;
|
|
uint32_t i, current_core;
|
|
char mempool_name[32];
|
|
|
|
snprintf(mempool_name, sizeof(mempool_name), "evtpool_%d", getpid());
|
|
g_spdk_event_mempool = spdk_mempool_create(mempool_name,
|
|
262144 - 1, /* Power of 2 minus 1 is optimal for memory consumption */
|
|
sizeof(struct spdk_event),
|
|
SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
|
|
SPDK_ENV_SOCKET_ID_ANY);
|
|
|
|
if (g_spdk_event_mempool == NULL) {
|
|
SPDK_ERRLOG("spdk_event_mempool creation failed\n");
|
|
return -1;
|
|
}
|
|
|
|
/* struct spdk_reactor must be aligned on 64 byte boundary */
|
|
g_reactor_count = spdk_env_get_last_core() + 1;
|
|
rc = posix_memalign((void **)&g_reactors, 64,
|
|
g_reactor_count * sizeof(struct spdk_reactor));
|
|
if (rc != 0) {
|
|
SPDK_ERRLOG("Could not allocate array size=%u for g_reactors\n",
|
|
g_reactor_count);
|
|
spdk_mempool_free(g_spdk_event_mempool);
|
|
return -1;
|
|
}
|
|
|
|
g_core_infos = calloc(g_reactor_count, sizeof(*g_core_infos));
|
|
if (g_core_infos == NULL) {
|
|
SPDK_ERRLOG("Could not allocate memory for g_core_infos\n");
|
|
spdk_mempool_free(g_spdk_event_mempool);
|
|
free(g_reactors);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
memset(g_reactors, 0, (g_reactor_count) * sizeof(struct spdk_reactor));
|
|
|
|
rc = spdk_thread_lib_init_ext(reactor_thread_op, reactor_thread_op_supported,
|
|
sizeof(struct spdk_lw_thread), msg_mempool_size);
|
|
if (rc != 0) {
|
|
SPDK_ERRLOG("Initialize spdk thread lib failed\n");
|
|
spdk_mempool_free(g_spdk_event_mempool);
|
|
free(g_reactors);
|
|
free(g_core_infos);
|
|
return rc;
|
|
}
|
|
|
|
SPDK_ENV_FOREACH_CORE(i) {
|
|
reactor_construct(&g_reactors[i], i);
|
|
}
|
|
|
|
current_core = spdk_env_get_current_core();
|
|
reactor = spdk_reactor_get(current_core);
|
|
assert(reactor != NULL);
|
|
g_scheduling_reactor = reactor;
|
|
|
|
g_reactor_state = SPDK_REACTOR_STATE_INITIALIZED;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
spdk_reactors_fini(void)
|
|
{
|
|
uint32_t i;
|
|
struct spdk_reactor *reactor;
|
|
|
|
if (g_reactor_state == SPDK_REACTOR_STATE_UNINITIALIZED) {
|
|
return;
|
|
}
|
|
|
|
spdk_thread_lib_fini();
|
|
|
|
SPDK_ENV_FOREACH_CORE(i) {
|
|
reactor = spdk_reactor_get(i);
|
|
assert(reactor != NULL);
|
|
assert(reactor->thread_count == 0);
|
|
if (reactor->events != NULL) {
|
|
spdk_ring_free(reactor->events);
|
|
}
|
|
|
|
reactor_interrupt_fini(reactor);
|
|
|
|
if (g_core_infos != NULL) {
|
|
free(g_core_infos[i].thread_infos);
|
|
}
|
|
}
|
|
|
|
spdk_mempool_free(g_spdk_event_mempool);
|
|
|
|
free(g_reactors);
|
|
g_reactors = NULL;
|
|
free(g_core_infos);
|
|
g_core_infos = NULL;
|
|
}
|
|
|
|
static void _reactor_set_interrupt_mode(void *arg1, void *arg2);
|
|
|
|
static void
|
|
_reactor_set_notify_cpuset(void *arg1, void *arg2)
|
|
{
|
|
struct spdk_reactor *target = arg1;
|
|
struct spdk_reactor *reactor = spdk_reactor_get(spdk_env_get_current_core());
|
|
|
|
assert(reactor != NULL);
|
|
spdk_cpuset_set_cpu(&reactor->notify_cpuset, target->lcore, target->new_in_interrupt);
|
|
}
|
|
|
|
static void
|
|
_event_call(uint32_t lcore, spdk_event_fn fn, void *arg1, void *arg2)
|
|
{
|
|
struct spdk_event *ev;
|
|
|
|
ev = spdk_event_allocate(lcore, fn, arg1, arg2);
|
|
assert(ev);
|
|
spdk_event_call(ev);
|
|
}
|
|
|
|
static void
|
|
_reactor_set_notify_cpuset_cpl(void *arg1, void *arg2)
|
|
{
|
|
struct spdk_reactor *target = arg1;
|
|
|
|
if (target->new_in_interrupt == false) {
|
|
target->set_interrupt_mode_in_progress = false;
|
|
spdk_thread_send_msg(_spdk_get_app_thread(), target->set_interrupt_mode_cb_fn,
|
|
target->set_interrupt_mode_cb_arg);
|
|
} else {
|
|
_event_call(target->lcore, _reactor_set_interrupt_mode, target, NULL);
|
|
}
|
|
}
|
|
|
|
static void
|
|
_reactor_set_thread_interrupt_mode(void *ctx)
|
|
{
|
|
struct spdk_reactor *reactor = ctx;
|
|
|
|
spdk_thread_set_interrupt_mode(reactor->in_interrupt);
|
|
}
|
|
|
|
static void
|
|
_reactor_set_interrupt_mode(void *arg1, void *arg2)
|
|
{
|
|
struct spdk_reactor *target = arg1;
|
|
struct spdk_thread *thread;
|
|
struct spdk_lw_thread *lw_thread, *tmp;
|
|
|
|
assert(target == spdk_reactor_get(spdk_env_get_current_core()));
|
|
assert(target != NULL);
|
|
assert(target->in_interrupt != target->new_in_interrupt);
|
|
SPDK_DEBUGLOG(reactor, "Do reactor set on core %u from %s to state %s\n",
|
|
target->lcore, target->in_interrupt ? "intr" : "poll", target->new_in_interrupt ? "intr" : "poll");
|
|
|
|
target->in_interrupt = target->new_in_interrupt;
|
|
|
|
/* Align spdk_thread with reactor to interrupt mode or poll mode */
|
|
TAILQ_FOREACH_SAFE(lw_thread, &target->threads, link, tmp) {
|
|
thread = spdk_thread_get_from_ctx(lw_thread);
|
|
spdk_thread_send_msg(thread, _reactor_set_thread_interrupt_mode, target);
|
|
}
|
|
|
|
if (target->new_in_interrupt == false) {
|
|
/* Reactor is no longer in interrupt mode. Refresh the tsc_last to accurately
|
|
* track reactor stats. */
|
|
target->tsc_last = spdk_get_ticks();
|
|
spdk_for_each_reactor(_reactor_set_notify_cpuset, target, NULL, _reactor_set_notify_cpuset_cpl);
|
|
} else {
|
|
uint64_t notify = 1;
|
|
int rc = 0;
|
|
|
|
/* Always trigger spdk_event and resched event in case of race condition */
|
|
rc = write(target->events_fd, ¬ify, sizeof(notify));
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno));
|
|
}
|
|
rc = write(target->resched_fd, ¬ify, sizeof(notify));
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("failed to notify reschedule: %s.\n", spdk_strerror(errno));
|
|
}
|
|
|
|
target->set_interrupt_mode_in_progress = false;
|
|
spdk_thread_send_msg(_spdk_get_app_thread(), target->set_interrupt_mode_cb_fn,
|
|
target->set_interrupt_mode_cb_arg);
|
|
}
|
|
}
|
|
|
|
int
|
|
spdk_reactor_set_interrupt_mode(uint32_t lcore, bool new_in_interrupt,
|
|
spdk_reactor_set_interrupt_mode_cb cb_fn, void *cb_arg)
|
|
{
|
|
struct spdk_reactor *target;
|
|
|
|
target = spdk_reactor_get(lcore);
|
|
if (target == NULL) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Eventfd has to be supported in order to use interrupt functionality. */
|
|
if (target->fgrp == NULL) {
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
if (spdk_get_thread() != _spdk_get_app_thread()) {
|
|
SPDK_ERRLOG("It is only permitted within spdk application thread.\n");
|
|
return -EPERM;
|
|
}
|
|
|
|
if (target->in_interrupt == new_in_interrupt) {
|
|
cb_fn(cb_arg);
|
|
return 0;
|
|
}
|
|
|
|
if (target->set_interrupt_mode_in_progress) {
|
|
SPDK_NOTICELOG("Reactor(%u) is already in progress to set interrupt mode\n", lcore);
|
|
return -EBUSY;
|
|
}
|
|
target->set_interrupt_mode_in_progress = true;
|
|
|
|
target->new_in_interrupt = new_in_interrupt;
|
|
target->set_interrupt_mode_cb_fn = cb_fn;
|
|
target->set_interrupt_mode_cb_arg = cb_arg;
|
|
|
|
SPDK_DEBUGLOG(reactor, "Starting reactor event from %d to %d\n",
|
|
spdk_env_get_current_core(), lcore);
|
|
|
|
if (new_in_interrupt == false) {
|
|
/* For potential race cases, when setting the reactor to poll mode,
|
|
* first change the mode of the reactor and then clear the corresponding
|
|
* bit of the notify_cpuset of each reactor.
|
|
*/
|
|
_event_call(lcore, _reactor_set_interrupt_mode, target, NULL);
|
|
} else {
|
|
/* For race cases, when setting the reactor to interrupt mode, first set the
|
|
* corresponding bit of the notify_cpuset of each reactor and then change the mode.
|
|
*/
|
|
spdk_for_each_reactor(_reactor_set_notify_cpuset, target, NULL, _reactor_set_notify_cpuset_cpl);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct spdk_event *
|
|
spdk_event_allocate(uint32_t lcore, spdk_event_fn fn, void *arg1, void *arg2)
|
|
{
|
|
struct spdk_event *event = NULL;
|
|
struct spdk_reactor *reactor = spdk_reactor_get(lcore);
|
|
|
|
if (!reactor) {
|
|
assert(false);
|
|
return NULL;
|
|
}
|
|
|
|
event = spdk_mempool_get(g_spdk_event_mempool);
|
|
if (event == NULL) {
|
|
assert(false);
|
|
return NULL;
|
|
}
|
|
|
|
event->lcore = lcore;
|
|
event->fn = fn;
|
|
event->arg1 = arg1;
|
|
event->arg2 = arg2;
|
|
|
|
return event;
|
|
}
|
|
|
|
void
|
|
spdk_event_call(struct spdk_event *event)
|
|
{
|
|
int rc;
|
|
struct spdk_reactor *reactor;
|
|
struct spdk_reactor *local_reactor = NULL;
|
|
uint32_t current_core = spdk_env_get_current_core();
|
|
|
|
reactor = spdk_reactor_get(event->lcore);
|
|
|
|
assert(reactor != NULL);
|
|
assert(reactor->events != NULL);
|
|
|
|
rc = spdk_ring_enqueue(reactor->events, (void **)&event, 1, NULL);
|
|
if (rc != 1) {
|
|
assert(false);
|
|
}
|
|
|
|
if (current_core != SPDK_ENV_LCORE_ID_ANY) {
|
|
local_reactor = spdk_reactor_get(current_core);
|
|
}
|
|
|
|
/* If spdk_event_call isn't called on a reactor, always send a notification.
|
|
* If it is called on a reactor, send a notification if the destination reactor
|
|
* is indicated in interrupt mode state.
|
|
*/
|
|
if (spdk_unlikely(local_reactor == NULL) ||
|
|
spdk_unlikely(spdk_cpuset_get_cpu(&local_reactor->notify_cpuset, event->lcore))) {
|
|
uint64_t notify = 1;
|
|
|
|
rc = write(reactor->events_fd, ¬ify, sizeof(notify));
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno));
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline int
|
|
event_queue_run_batch(void *arg)
|
|
{
|
|
struct spdk_reactor *reactor = arg;
|
|
size_t count, i;
|
|
void *events[SPDK_EVENT_BATCH_SIZE];
|
|
struct spdk_thread *thread;
|
|
struct spdk_lw_thread *lw_thread;
|
|
|
|
#ifdef DEBUG
|
|
/*
|
|
* spdk_ring_dequeue() fills events and returns how many entries it wrote,
|
|
* so we will never actually read uninitialized data from events, but just to be sure
|
|
* (and to silence a static analyzer false positive), initialize the array to NULL pointers.
|
|
*/
|
|
memset(events, 0, sizeof(events));
|
|
#endif
|
|
|
|
/* Operate event notification if this reactor currently runs in interrupt state */
|
|
if (spdk_unlikely(reactor->in_interrupt)) {
|
|
uint64_t notify = 1;
|
|
int rc;
|
|
|
|
/* There may be race between event_acknowledge and another producer's event_notify,
|
|
* so event_acknowledge should be applied ahead. And then check for self's event_notify.
|
|
* This can avoid event notification missing.
|
|
*/
|
|
rc = read(reactor->events_fd, ¬ify, sizeof(notify));
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("failed to acknowledge event queue: %s.\n", spdk_strerror(errno));
|
|
return -errno;
|
|
}
|
|
|
|
count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE);
|
|
|
|
if (spdk_ring_count(reactor->events) != 0) {
|
|
/* Trigger new notification if there are still events in event-queue waiting for processing. */
|
|
rc = write(reactor->events_fd, ¬ify, sizeof(notify));
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("failed to notify event queue: %s.\n", spdk_strerror(errno));
|
|
return -errno;
|
|
}
|
|
}
|
|
} else {
|
|
count = spdk_ring_dequeue(reactor->events, events, SPDK_EVENT_BATCH_SIZE);
|
|
}
|
|
|
|
if (count == 0) {
|
|
return 0;
|
|
}
|
|
|
|
/* Execute the events. There are still some remaining events
|
|
* that must occur on an SPDK thread. To accomodate those, try to
|
|
* run them on the first thread in the list, if it exists. */
|
|
lw_thread = TAILQ_FIRST(&reactor->threads);
|
|
if (lw_thread) {
|
|
thread = spdk_thread_get_from_ctx(lw_thread);
|
|
} else {
|
|
thread = NULL;
|
|
}
|
|
|
|
for (i = 0; i < count; i++) {
|
|
struct spdk_event *event = events[i];
|
|
|
|
assert(event != NULL);
|
|
spdk_set_thread(thread);
|
|
|
|
SPDK_DTRACE_PROBE3(event_exec, event->fn,
|
|
event->arg1, event->arg2);
|
|
event->fn(event->arg1, event->arg2);
|
|
spdk_set_thread(NULL);
|
|
}
|
|
|
|
spdk_mempool_put_bulk(g_spdk_event_mempool, events, count);
|
|
|
|
return (int)count;
|
|
}
|
|
|
|
/* 1s */
|
|
#define CONTEXT_SWITCH_MONITOR_PERIOD 1000000
|
|
|
|
static int
|
|
get_rusage(struct spdk_reactor *reactor)
|
|
{
|
|
struct rusage rusage;
|
|
|
|
if (getrusage(RUSAGE_THREAD, &rusage) != 0) {
|
|
return -1;
|
|
}
|
|
|
|
if (rusage.ru_nvcsw != reactor->rusage.ru_nvcsw || rusage.ru_nivcsw != reactor->rusage.ru_nivcsw) {
|
|
SPDK_INFOLOG(reactor,
|
|
"Reactor %d: %ld voluntary context switches and %ld involuntary context switches in the last second.\n",
|
|
reactor->lcore, rusage.ru_nvcsw - reactor->rusage.ru_nvcsw,
|
|
rusage.ru_nivcsw - reactor->rusage.ru_nivcsw);
|
|
}
|
|
reactor->rusage = rusage;
|
|
|
|
return -1;
|
|
}
|
|
|
|
void
|
|
spdk_framework_enable_context_switch_monitor(bool enable)
|
|
{
|
|
/* This global is being read by multiple threads, so this isn't
|
|
* strictly thread safe. However, we're toggling between true and
|
|
* false here, and if a thread sees the value update later than it
|
|
* should, it's no big deal. */
|
|
g_framework_context_switch_monitor_enabled = enable;
|
|
}
|
|
|
|
bool
|
|
spdk_framework_context_switch_monitor_enabled(void)
|
|
{
|
|
return g_framework_context_switch_monitor_enabled;
|
|
}
|
|
|
|
static void
|
|
_set_thread_name(const char *thread_name)
|
|
{
|
|
#if defined(__linux__)
|
|
prctl(PR_SET_NAME, thread_name, 0, 0, 0);
|
|
#elif defined(__FreeBSD__)
|
|
pthread_set_name_np(pthread_self(), thread_name);
|
|
#else
|
|
pthread_setname_np(pthread_self(), thread_name);
|
|
#endif
|
|
}
|
|
|
|
static void
|
|
_init_thread_stats(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread)
|
|
{
|
|
struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread);
|
|
struct spdk_thread_stats prev_total_stats;
|
|
|
|
/* Read total_stats before updating it to calculate stats during the last scheduling period. */
|
|
prev_total_stats = lw_thread->total_stats;
|
|
|
|
spdk_set_thread(thread);
|
|
spdk_thread_get_stats(&lw_thread->total_stats);
|
|
spdk_set_thread(NULL);
|
|
|
|
lw_thread->current_stats.busy_tsc = lw_thread->total_stats.busy_tsc - prev_total_stats.busy_tsc;
|
|
lw_thread->current_stats.idle_tsc = lw_thread->total_stats.idle_tsc - prev_total_stats.idle_tsc;
|
|
}
|
|
|
|
static void
|
|
_threads_reschedule_thread(struct spdk_scheduler_thread_info *thread_info)
|
|
{
|
|
struct spdk_lw_thread *lw_thread;
|
|
struct spdk_thread *thread;
|
|
|
|
thread = spdk_thread_get_by_id(thread_info->thread_id);
|
|
if (thread == NULL) {
|
|
/* Thread no longer exists. */
|
|
return;
|
|
}
|
|
lw_thread = spdk_thread_get_ctx(thread);
|
|
assert(lw_thread != NULL);
|
|
|
|
lw_thread->lcore = thread_info->lcore;
|
|
lw_thread->resched = true;
|
|
}
|
|
|
|
static void
|
|
_threads_reschedule(struct spdk_scheduler_core_info *cores_info)
|
|
{
|
|
struct spdk_scheduler_core_info *core;
|
|
struct spdk_scheduler_thread_info *thread_info;
|
|
uint32_t i, j;
|
|
|
|
SPDK_ENV_FOREACH_CORE(i) {
|
|
core = &cores_info[i];
|
|
for (j = 0; j < core->threads_count; j++) {
|
|
thread_info = &core->thread_infos[j];
|
|
if (thread_info->lcore != i) {
|
|
_threads_reschedule_thread(thread_info);
|
|
}
|
|
}
|
|
core->threads_count = 0;
|
|
free(core->thread_infos);
|
|
core->thread_infos = NULL;
|
|
}
|
|
}
|
|
|
|
static void
|
|
_reactors_scheduler_fini(void)
|
|
{
|
|
/* Reschedule based on the balancing output */
|
|
_threads_reschedule(g_core_infos);
|
|
|
|
g_scheduling_in_progress = false;
|
|
}
|
|
|
|
static void
|
|
_reactors_scheduler_update_core_mode(void *ctx)
|
|
{
|
|
struct spdk_reactor *reactor;
|
|
uint32_t i;
|
|
int rc = 0;
|
|
|
|
for (i = g_scheduler_core_number; i < SPDK_ENV_LCORE_ID_ANY; i = spdk_env_get_next_core(i)) {
|
|
reactor = spdk_reactor_get(i);
|
|
assert(reactor != NULL);
|
|
if (reactor->in_interrupt != g_core_infos[i].interrupt_mode) {
|
|
/* Switch next found reactor to new state */
|
|
rc = spdk_reactor_set_interrupt_mode(i, g_core_infos[i].interrupt_mode,
|
|
_reactors_scheduler_update_core_mode, NULL);
|
|
if (rc == 0) {
|
|
/* Set core to start with after callback completes */
|
|
g_scheduler_core_number = spdk_env_get_next_core(i);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
_reactors_scheduler_fini();
|
|
}
|
|
|
|
static void
|
|
_reactors_scheduler_cancel(void *arg1, void *arg2)
|
|
{
|
|
struct spdk_scheduler_core_info *core;
|
|
uint32_t i;
|
|
|
|
SPDK_ENV_FOREACH_CORE(i) {
|
|
core = &g_core_infos[i];
|
|
core->threads_count = 0;
|
|
free(core->thread_infos);
|
|
core->thread_infos = NULL;
|
|
}
|
|
|
|
g_scheduling_in_progress = false;
|
|
}
|
|
|
|
static void
|
|
_reactors_scheduler_balance(void *arg1, void *arg2)
|
|
{
|
|
struct spdk_scheduler *scheduler = spdk_scheduler_get();
|
|
|
|
if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING || scheduler == NULL) {
|
|
_reactors_scheduler_cancel(NULL, NULL);
|
|
return;
|
|
}
|
|
|
|
scheduler->balance(g_core_infos, g_reactor_count);
|
|
|
|
g_scheduler_core_number = spdk_env_get_first_core();
|
|
_reactors_scheduler_update_core_mode(NULL);
|
|
}
|
|
|
|
/* Phase 1 of thread scheduling is to gather metrics on the existing threads */
|
|
static void
|
|
_reactors_scheduler_gather_metrics(void *arg1, void *arg2)
|
|
{
|
|
struct spdk_scheduler_core_info *core_info;
|
|
struct spdk_lw_thread *lw_thread;
|
|
struct spdk_thread *thread;
|
|
struct spdk_reactor *reactor;
|
|
uint32_t next_core;
|
|
uint32_t i = 0;
|
|
|
|
reactor = spdk_reactor_get(spdk_env_get_current_core());
|
|
assert(reactor != NULL);
|
|
core_info = &g_core_infos[reactor->lcore];
|
|
core_info->lcore = reactor->lcore;
|
|
core_info->current_idle_tsc = reactor->idle_tsc - core_info->total_idle_tsc;
|
|
core_info->total_idle_tsc = reactor->idle_tsc;
|
|
core_info->current_busy_tsc = reactor->busy_tsc - core_info->total_busy_tsc;
|
|
core_info->total_busy_tsc = reactor->busy_tsc;
|
|
core_info->interrupt_mode = reactor->in_interrupt;
|
|
core_info->threads_count = 0;
|
|
|
|
SPDK_DEBUGLOG(reactor, "Gathering metrics on %u\n", reactor->lcore);
|
|
|
|
if (reactor->thread_count > 0) {
|
|
core_info->thread_infos = calloc(reactor->thread_count, sizeof(*core_info->thread_infos));
|
|
if (core_info->thread_infos == NULL) {
|
|
SPDK_ERRLOG("Failed to allocate memory when gathering metrics on %u\n", reactor->lcore);
|
|
|
|
/* Cancel this round of schedule work */
|
|
_event_call(g_scheduling_reactor->lcore, _reactors_scheduler_cancel, NULL, NULL);
|
|
return;
|
|
}
|
|
|
|
TAILQ_FOREACH(lw_thread, &reactor->threads, link) {
|
|
_init_thread_stats(reactor, lw_thread);
|
|
|
|
core_info->thread_infos[i].lcore = lw_thread->lcore;
|
|
thread = spdk_thread_get_from_ctx(lw_thread);
|
|
assert(thread != NULL);
|
|
core_info->thread_infos[i].thread_id = spdk_thread_get_id(thread);
|
|
core_info->thread_infos[i].total_stats = lw_thread->total_stats;
|
|
core_info->thread_infos[i].current_stats = lw_thread->current_stats;
|
|
core_info->threads_count++;
|
|
assert(core_info->threads_count <= reactor->thread_count);
|
|
i++;
|
|
}
|
|
}
|
|
|
|
next_core = spdk_env_get_next_core(reactor->lcore);
|
|
if (next_core == UINT32_MAX) {
|
|
next_core = spdk_env_get_first_core();
|
|
}
|
|
|
|
/* If we've looped back around to the scheduler thread, move to the next phase */
|
|
if (next_core == g_scheduling_reactor->lcore) {
|
|
/* Phase 2 of scheduling is rebalancing - deciding which threads to move where */
|
|
_event_call(next_core, _reactors_scheduler_balance, NULL, NULL);
|
|
return;
|
|
}
|
|
|
|
_event_call(next_core, _reactors_scheduler_gather_metrics, NULL, NULL);
|
|
}
|
|
|
|
static int _reactor_schedule_thread(struct spdk_thread *thread);
|
|
static uint64_t g_rusage_period;
|
|
|
|
static void
|
|
_reactor_remove_lw_thread(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread)
|
|
{
|
|
struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread);
|
|
int efd;
|
|
|
|
TAILQ_REMOVE(&reactor->threads, lw_thread, link);
|
|
assert(reactor->thread_count > 0);
|
|
reactor->thread_count--;
|
|
|
|
/* Operate thread intr if running with full interrupt ability */
|
|
if (spdk_interrupt_mode_is_enabled()) {
|
|
efd = spdk_thread_get_interrupt_fd(thread);
|
|
spdk_fd_group_remove(reactor->fgrp, efd);
|
|
}
|
|
}
|
|
|
|
static bool
|
|
reactor_post_process_lw_thread(struct spdk_reactor *reactor, struct spdk_lw_thread *lw_thread)
|
|
{
|
|
struct spdk_thread *thread = spdk_thread_get_from_ctx(lw_thread);
|
|
|
|
if (spdk_unlikely(spdk_thread_is_exited(thread) &&
|
|
spdk_thread_is_idle(thread))) {
|
|
_reactor_remove_lw_thread(reactor, lw_thread);
|
|
spdk_thread_destroy(thread);
|
|
return true;
|
|
}
|
|
|
|
if (spdk_unlikely(lw_thread->resched)) {
|
|
lw_thread->resched = false;
|
|
_reactor_remove_lw_thread(reactor, lw_thread);
|
|
_reactor_schedule_thread(thread);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void
|
|
reactor_interrupt_run(struct spdk_reactor *reactor)
|
|
{
|
|
int block_timeout = -1; /* _EPOLL_WAIT_FOREVER */
|
|
|
|
spdk_fd_group_wait(reactor->fgrp, block_timeout);
|
|
}
|
|
|
|
static void
|
|
_reactor_run(struct spdk_reactor *reactor)
|
|
{
|
|
struct spdk_thread *thread;
|
|
struct spdk_lw_thread *lw_thread, *tmp;
|
|
uint64_t now;
|
|
int rc;
|
|
|
|
event_queue_run_batch(reactor);
|
|
|
|
/* If no threads are present on the reactor,
|
|
* tsc_last gets outdated. Update it to track
|
|
* thread execution time correctly. */
|
|
if (spdk_unlikely(TAILQ_EMPTY(&reactor->threads))) {
|
|
now = spdk_get_ticks();
|
|
reactor->idle_tsc += now - reactor->tsc_last;
|
|
reactor->tsc_last = now;
|
|
return;
|
|
}
|
|
|
|
TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) {
|
|
thread = spdk_thread_get_from_ctx(lw_thread);
|
|
rc = spdk_thread_poll(thread, 0, reactor->tsc_last);
|
|
|
|
now = spdk_thread_get_last_tsc(thread);
|
|
if (rc == 0) {
|
|
reactor->idle_tsc += now - reactor->tsc_last;
|
|
} else if (rc > 0) {
|
|
reactor->busy_tsc += now - reactor->tsc_last;
|
|
}
|
|
reactor->tsc_last = now;
|
|
|
|
reactor_post_process_lw_thread(reactor, lw_thread);
|
|
}
|
|
}
|
|
|
|
static int
|
|
reactor_run(void *arg)
|
|
{
|
|
struct spdk_reactor *reactor = arg;
|
|
struct spdk_thread *thread;
|
|
struct spdk_lw_thread *lw_thread, *tmp;
|
|
char thread_name[32];
|
|
uint64_t last_sched = 0;
|
|
|
|
SPDK_NOTICELOG("Reactor started on core %u\n", reactor->lcore);
|
|
|
|
/* Rename the POSIX thread because the reactor is tied to the POSIX
|
|
* thread in the SPDK event library.
|
|
*/
|
|
snprintf(thread_name, sizeof(thread_name), "reactor_%u", reactor->lcore);
|
|
_set_thread_name(thread_name);
|
|
|
|
reactor->tsc_last = spdk_get_ticks();
|
|
|
|
while (1) {
|
|
/* Execute interrupt process fn if this reactor currently runs in interrupt state */
|
|
if (spdk_unlikely(reactor->in_interrupt)) {
|
|
reactor_interrupt_run(reactor);
|
|
} else {
|
|
_reactor_run(reactor);
|
|
}
|
|
|
|
if (g_framework_context_switch_monitor_enabled) {
|
|
if ((reactor->last_rusage + g_rusage_period) < reactor->tsc_last) {
|
|
get_rusage(reactor);
|
|
reactor->last_rusage = reactor->tsc_last;
|
|
}
|
|
}
|
|
|
|
if (spdk_unlikely(g_scheduler_period > 0 &&
|
|
(reactor->tsc_last - last_sched) > g_scheduler_period &&
|
|
reactor == g_scheduling_reactor &&
|
|
!g_scheduling_in_progress)) {
|
|
last_sched = reactor->tsc_last;
|
|
g_scheduling_in_progress = true;
|
|
_reactors_scheduler_gather_metrics(NULL, NULL);
|
|
}
|
|
|
|
if (g_reactor_state != SPDK_REACTOR_STATE_RUNNING) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
TAILQ_FOREACH(lw_thread, &reactor->threads, link) {
|
|
thread = spdk_thread_get_from_ctx(lw_thread);
|
|
spdk_set_thread(thread);
|
|
spdk_thread_exit(thread);
|
|
}
|
|
|
|
while (!TAILQ_EMPTY(&reactor->threads)) {
|
|
TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) {
|
|
thread = spdk_thread_get_from_ctx(lw_thread);
|
|
spdk_set_thread(thread);
|
|
if (spdk_thread_is_exited(thread)) {
|
|
_reactor_remove_lw_thread(reactor, lw_thread);
|
|
spdk_thread_destroy(thread);
|
|
} else {
|
|
spdk_thread_poll(thread, 0, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
spdk_app_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask)
|
|
{
|
|
int ret;
|
|
const struct spdk_cpuset *validmask;
|
|
|
|
ret = spdk_cpuset_parse(cpumask, mask);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
|
|
validmask = spdk_app_get_core_mask();
|
|
spdk_cpuset_and(cpumask, validmask);
|
|
|
|
return 0;
|
|
}
|
|
|
|
const struct spdk_cpuset *
|
|
spdk_app_get_core_mask(void)
|
|
{
|
|
return &g_reactor_core_mask;
|
|
}
|
|
|
|
void
|
|
spdk_reactors_start(void)
|
|
{
|
|
struct spdk_reactor *reactor;
|
|
uint32_t i, current_core;
|
|
int rc;
|
|
|
|
g_rusage_period = (CONTEXT_SWITCH_MONITOR_PERIOD * spdk_get_ticks_hz()) / SPDK_SEC_TO_USEC;
|
|
g_reactor_state = SPDK_REACTOR_STATE_RUNNING;
|
|
/* Reinitialize to false, in case the app framework is restarting in the same process. */
|
|
g_stopping_reactors = false;
|
|
|
|
current_core = spdk_env_get_current_core();
|
|
SPDK_ENV_FOREACH_CORE(i) {
|
|
if (i != current_core) {
|
|
reactor = spdk_reactor_get(i);
|
|
if (reactor == NULL) {
|
|
continue;
|
|
}
|
|
|
|
rc = spdk_env_thread_launch_pinned(reactor->lcore, reactor_run, reactor);
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("Unable to start reactor thread on core %u\n", reactor->lcore);
|
|
assert(false);
|
|
return;
|
|
}
|
|
}
|
|
spdk_cpuset_set_cpu(&g_reactor_core_mask, i, true);
|
|
}
|
|
|
|
/* Start the main reactor */
|
|
reactor = spdk_reactor_get(current_core);
|
|
assert(reactor != NULL);
|
|
reactor_run(reactor);
|
|
|
|
spdk_env_thread_wait_all();
|
|
|
|
g_reactor_state = SPDK_REACTOR_STATE_SHUTDOWN;
|
|
}
|
|
|
|
static void
|
|
_reactors_stop(void *arg1, void *arg2)
|
|
{
|
|
uint32_t i;
|
|
int rc;
|
|
struct spdk_reactor *reactor;
|
|
struct spdk_reactor *local_reactor;
|
|
uint64_t notify = 1;
|
|
|
|
g_reactor_state = SPDK_REACTOR_STATE_EXITING;
|
|
local_reactor = spdk_reactor_get(spdk_env_get_current_core());
|
|
|
|
SPDK_ENV_FOREACH_CORE(i) {
|
|
/* If spdk_event_call isn't called on a reactor, always send a notification.
|
|
* If it is called on a reactor, send a notification if the destination reactor
|
|
* is indicated in interrupt mode state.
|
|
*/
|
|
if (local_reactor == NULL || spdk_cpuset_get_cpu(&local_reactor->notify_cpuset, i)) {
|
|
reactor = spdk_reactor_get(i);
|
|
assert(reactor != NULL);
|
|
rc = write(reactor->events_fd, ¬ify, sizeof(notify));
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("failed to notify event queue for reactor(%u): %s.\n", i, spdk_strerror(errno));
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
nop(void *arg1, void *arg2)
|
|
{
|
|
}
|
|
|
|
void
|
|
spdk_reactors_stop(void *arg1)
|
|
{
|
|
spdk_for_each_reactor(nop, NULL, NULL, _reactors_stop);
|
|
}
|
|
|
|
static pthread_mutex_t g_scheduler_mtx = PTHREAD_MUTEX_INITIALIZER;
|
|
static uint32_t g_next_core = UINT32_MAX;
|
|
|
|
static int
|
|
thread_process_interrupts(void *arg)
|
|
{
|
|
struct spdk_thread *thread = arg;
|
|
struct spdk_reactor *reactor = spdk_reactor_get(spdk_env_get_current_core());
|
|
uint64_t now;
|
|
int rc;
|
|
|
|
assert(reactor != NULL);
|
|
|
|
/* Update idle_tsc between the end of last intr_fn and the start of this intr_fn. */
|
|
now = spdk_get_ticks();
|
|
reactor->idle_tsc += now - reactor->tsc_last;
|
|
reactor->tsc_last = now;
|
|
|
|
rc = spdk_thread_poll(thread, 0, now);
|
|
|
|
/* Update tsc between the start and the end of this intr_fn. */
|
|
now = spdk_thread_get_last_tsc(thread);
|
|
if (rc == 0) {
|
|
reactor->idle_tsc += now - reactor->tsc_last;
|
|
} else if (rc > 0) {
|
|
reactor->busy_tsc += now - reactor->tsc_last;
|
|
}
|
|
reactor->tsc_last = now;
|
|
|
|
return rc;
|
|
}
|
|
|
|
static void
|
|
_schedule_thread(void *arg1, void *arg2)
|
|
{
|
|
struct spdk_lw_thread *lw_thread = arg1;
|
|
struct spdk_thread *thread;
|
|
struct spdk_reactor *reactor;
|
|
uint32_t current_core;
|
|
int efd;
|
|
|
|
current_core = spdk_env_get_current_core();
|
|
reactor = spdk_reactor_get(current_core);
|
|
assert(reactor != NULL);
|
|
|
|
/* Update total_stats to reflect state of thread
|
|
* at the end of the move. */
|
|
thread = spdk_thread_get_from_ctx(lw_thread);
|
|
spdk_set_thread(thread);
|
|
spdk_thread_get_stats(&lw_thread->total_stats);
|
|
spdk_set_thread(NULL);
|
|
|
|
lw_thread->lcore = current_core;
|
|
|
|
TAILQ_INSERT_TAIL(&reactor->threads, lw_thread, link);
|
|
reactor->thread_count++;
|
|
|
|
/* Operate thread intr if running with full interrupt ability */
|
|
if (spdk_interrupt_mode_is_enabled()) {
|
|
int rc;
|
|
|
|
efd = spdk_thread_get_interrupt_fd(thread);
|
|
rc = SPDK_FD_GROUP_ADD(reactor->fgrp, efd,
|
|
thread_process_interrupts, thread);
|
|
if (rc < 0) {
|
|
SPDK_ERRLOG("Failed to schedule spdk_thread: %s.\n", spdk_strerror(-rc));
|
|
}
|
|
|
|
/* Align spdk_thread with reactor to interrupt mode or poll mode */
|
|
spdk_thread_send_msg(thread, _reactor_set_thread_interrupt_mode, reactor);
|
|
}
|
|
}
|
|
|
|
static int
|
|
_reactor_schedule_thread(struct spdk_thread *thread)
|
|
{
|
|
uint32_t core;
|
|
struct spdk_lw_thread *lw_thread;
|
|
struct spdk_event *evt = NULL;
|
|
struct spdk_cpuset *cpumask;
|
|
uint32_t i;
|
|
struct spdk_reactor *local_reactor = NULL;
|
|
uint32_t current_lcore = spdk_env_get_current_core();
|
|
struct spdk_cpuset polling_cpumask;
|
|
struct spdk_cpuset valid_cpumask;
|
|
|
|
cpumask = spdk_thread_get_cpumask(thread);
|
|
|
|
lw_thread = spdk_thread_get_ctx(thread);
|
|
assert(lw_thread != NULL);
|
|
core = lw_thread->lcore;
|
|
memset(lw_thread, 0, sizeof(*lw_thread));
|
|
|
|
if (current_lcore != SPDK_ENV_LCORE_ID_ANY) {
|
|
local_reactor = spdk_reactor_get(current_lcore);
|
|
assert(local_reactor);
|
|
}
|
|
|
|
/* When interrupt ability of spdk_thread is not enabled and the current
|
|
* reactor runs on DPDK thread, skip reactors which are in interrupt mode.
|
|
*/
|
|
if (!spdk_interrupt_mode_is_enabled() && local_reactor != NULL) {
|
|
/* Get the cpumask of all reactors in polling */
|
|
spdk_cpuset_zero(&polling_cpumask);
|
|
SPDK_ENV_FOREACH_CORE(i) {
|
|
spdk_cpuset_set_cpu(&polling_cpumask, i, true);
|
|
}
|
|
spdk_cpuset_xor(&polling_cpumask, &local_reactor->notify_cpuset);
|
|
|
|
if (core == SPDK_ENV_LCORE_ID_ANY) {
|
|
/* Get the cpumask of all valid reactors which are suggested and also in polling */
|
|
spdk_cpuset_copy(&valid_cpumask, &polling_cpumask);
|
|
spdk_cpuset_and(&valid_cpumask, spdk_thread_get_cpumask(thread));
|
|
|
|
/* If there are any valid reactors, spdk_thread should be scheduled
|
|
* into one of the valid reactors.
|
|
* If there is no valid reactors, spdk_thread should be scheduled
|
|
* into one of the polling reactors.
|
|
*/
|
|
if (spdk_cpuset_count(&valid_cpumask) != 0) {
|
|
cpumask = &valid_cpumask;
|
|
} else {
|
|
cpumask = &polling_cpumask;
|
|
}
|
|
} else if (!spdk_cpuset_get_cpu(&polling_cpumask, core)) {
|
|
/* If specified reactor is not in polling, spdk_thread should be scheduled
|
|
* into one of the polling reactors.
|
|
*/
|
|
core = SPDK_ENV_LCORE_ID_ANY;
|
|
cpumask = &polling_cpumask;
|
|
}
|
|
}
|
|
|
|
pthread_mutex_lock(&g_scheduler_mtx);
|
|
if (core == SPDK_ENV_LCORE_ID_ANY) {
|
|
for (i = 0; i < spdk_env_get_core_count(); i++) {
|
|
if (g_next_core >= g_reactor_count) {
|
|
g_next_core = spdk_env_get_first_core();
|
|
}
|
|
core = g_next_core;
|
|
g_next_core = spdk_env_get_next_core(g_next_core);
|
|
|
|
if (spdk_cpuset_get_cpu(cpumask, core)) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
evt = spdk_event_allocate(core, _schedule_thread, lw_thread, NULL);
|
|
|
|
pthread_mutex_unlock(&g_scheduler_mtx);
|
|
|
|
assert(evt != NULL);
|
|
if (evt == NULL) {
|
|
SPDK_ERRLOG("Unable to schedule thread on requested core mask.\n");
|
|
return -1;
|
|
}
|
|
|
|
lw_thread->tsc_start = spdk_get_ticks();
|
|
|
|
spdk_event_call(evt);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
_reactor_request_thread_reschedule(struct spdk_thread *thread)
|
|
{
|
|
struct spdk_lw_thread *lw_thread;
|
|
struct spdk_reactor *reactor;
|
|
uint32_t current_core;
|
|
|
|
assert(thread == spdk_get_thread());
|
|
|
|
lw_thread = spdk_thread_get_ctx(thread);
|
|
|
|
assert(lw_thread != NULL);
|
|
lw_thread->resched = true;
|
|
lw_thread->lcore = SPDK_ENV_LCORE_ID_ANY;
|
|
|
|
current_core = spdk_env_get_current_core();
|
|
reactor = spdk_reactor_get(current_core);
|
|
assert(reactor != NULL);
|
|
|
|
/* Send a notification if the destination reactor is indicated in intr mode state */
|
|
if (spdk_unlikely(spdk_cpuset_get_cpu(&reactor->notify_cpuset, reactor->lcore))) {
|
|
uint64_t notify = 1;
|
|
|
|
if (write(reactor->resched_fd, ¬ify, sizeof(notify)) < 0) {
|
|
SPDK_ERRLOG("failed to notify reschedule: %s.\n", spdk_strerror(errno));
|
|
}
|
|
}
|
|
}
|
|
|
|
static int
|
|
reactor_thread_op(struct spdk_thread *thread, enum spdk_thread_op op)
|
|
{
|
|
struct spdk_lw_thread *lw_thread;
|
|
|
|
switch (op) {
|
|
case SPDK_THREAD_OP_NEW:
|
|
lw_thread = spdk_thread_get_ctx(thread);
|
|
lw_thread->lcore = SPDK_ENV_LCORE_ID_ANY;
|
|
return _reactor_schedule_thread(thread);
|
|
case SPDK_THREAD_OP_RESCHED:
|
|
_reactor_request_thread_reschedule(thread);
|
|
return 0;
|
|
default:
|
|
return -ENOTSUP;
|
|
}
|
|
}
|
|
|
|
static bool
|
|
reactor_thread_op_supported(enum spdk_thread_op op)
|
|
{
|
|
switch (op) {
|
|
case SPDK_THREAD_OP_NEW:
|
|
case SPDK_THREAD_OP_RESCHED:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
struct call_reactor {
|
|
uint32_t cur_core;
|
|
spdk_event_fn fn;
|
|
void *arg1;
|
|
void *arg2;
|
|
|
|
uint32_t orig_core;
|
|
spdk_event_fn cpl;
|
|
};
|
|
|
|
static void
|
|
on_reactor(void *arg1, void *arg2)
|
|
{
|
|
struct call_reactor *cr = arg1;
|
|
struct spdk_event *evt;
|
|
|
|
cr->fn(cr->arg1, cr->arg2);
|
|
|
|
cr->cur_core = spdk_env_get_next_core(cr->cur_core);
|
|
|
|
if (cr->cur_core >= g_reactor_count) {
|
|
SPDK_DEBUGLOG(reactor, "Completed reactor iteration\n");
|
|
|
|
evt = spdk_event_allocate(cr->orig_core, cr->cpl, cr->arg1, cr->arg2);
|
|
free(cr);
|
|
} else {
|
|
SPDK_DEBUGLOG(reactor, "Continuing reactor iteration to %d\n",
|
|
cr->cur_core);
|
|
|
|
evt = spdk_event_allocate(cr->cur_core, on_reactor, arg1, NULL);
|
|
}
|
|
assert(evt != NULL);
|
|
spdk_event_call(evt);
|
|
}
|
|
|
|
void
|
|
spdk_for_each_reactor(spdk_event_fn fn, void *arg1, void *arg2, spdk_event_fn cpl)
|
|
{
|
|
struct call_reactor *cr;
|
|
|
|
/* When the application framework is shutting down, we will send one
|
|
* final for_each_reactor operation with completion callback _reactors_stop,
|
|
* to flush any existing for_each_reactor operations to avoid any memory
|
|
* leaks. We use a mutex here to protect a boolean flag that will ensure
|
|
* we don't start any more operations once we've started shutting down.
|
|
*/
|
|
pthread_mutex_lock(&g_stopping_reactors_mtx);
|
|
if (g_stopping_reactors) {
|
|
pthread_mutex_unlock(&g_stopping_reactors_mtx);
|
|
return;
|
|
} else if (cpl == _reactors_stop) {
|
|
g_stopping_reactors = true;
|
|
}
|
|
pthread_mutex_unlock(&g_stopping_reactors_mtx);
|
|
|
|
cr = calloc(1, sizeof(*cr));
|
|
if (!cr) {
|
|
SPDK_ERRLOG("Unable to perform reactor iteration\n");
|
|
cpl(arg1, arg2);
|
|
return;
|
|
}
|
|
|
|
cr->fn = fn;
|
|
cr->arg1 = arg1;
|
|
cr->arg2 = arg2;
|
|
cr->cpl = cpl;
|
|
cr->orig_core = spdk_env_get_current_core();
|
|
cr->cur_core = spdk_env_get_first_core();
|
|
|
|
SPDK_DEBUGLOG(reactor, "Starting reactor iteration from %d\n", cr->orig_core);
|
|
|
|
_event_call(cr->cur_core, on_reactor, cr, NULL);
|
|
}
|
|
|
|
#ifdef __linux__
|
|
static int
|
|
reactor_schedule_thread_event(void *arg)
|
|
{
|
|
struct spdk_reactor *reactor = arg;
|
|
struct spdk_lw_thread *lw_thread, *tmp;
|
|
uint32_t count = 0;
|
|
uint64_t notify = 1;
|
|
|
|
assert(reactor->in_interrupt);
|
|
|
|
if (read(reactor->resched_fd, ¬ify, sizeof(notify)) < 0) {
|
|
SPDK_ERRLOG("failed to acknowledge reschedule: %s.\n", spdk_strerror(errno));
|
|
return -errno;
|
|
}
|
|
|
|
TAILQ_FOREACH_SAFE(lw_thread, &reactor->threads, link, tmp) {
|
|
count += reactor_post_process_lw_thread(reactor, lw_thread) ? 1 : 0;
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
static int
|
|
reactor_interrupt_init(struct spdk_reactor *reactor)
|
|
{
|
|
int rc;
|
|
|
|
rc = spdk_fd_group_create(&reactor->fgrp);
|
|
if (rc != 0) {
|
|
return rc;
|
|
}
|
|
|
|
reactor->resched_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
|
|
if (reactor->resched_fd < 0) {
|
|
rc = -EBADF;
|
|
goto err;
|
|
}
|
|
|
|
rc = SPDK_FD_GROUP_ADD(reactor->fgrp, reactor->resched_fd, reactor_schedule_thread_event,
|
|
reactor);
|
|
if (rc) {
|
|
close(reactor->resched_fd);
|
|
goto err;
|
|
}
|
|
|
|
reactor->events_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
|
|
if (reactor->events_fd < 0) {
|
|
spdk_fd_group_remove(reactor->fgrp, reactor->resched_fd);
|
|
close(reactor->resched_fd);
|
|
|
|
rc = -EBADF;
|
|
goto err;
|
|
}
|
|
|
|
rc = SPDK_FD_GROUP_ADD(reactor->fgrp, reactor->events_fd,
|
|
event_queue_run_batch, reactor);
|
|
if (rc) {
|
|
spdk_fd_group_remove(reactor->fgrp, reactor->resched_fd);
|
|
close(reactor->resched_fd);
|
|
close(reactor->events_fd);
|
|
goto err;
|
|
}
|
|
|
|
return 0;
|
|
|
|
err:
|
|
spdk_fd_group_destroy(reactor->fgrp);
|
|
reactor->fgrp = NULL;
|
|
return rc;
|
|
}
|
|
#else
|
|
static int
|
|
reactor_interrupt_init(struct spdk_reactor *reactor)
|
|
{
|
|
return -ENOTSUP;
|
|
}
|
|
#endif
|
|
|
|
static void
|
|
reactor_interrupt_fini(struct spdk_reactor *reactor)
|
|
{
|
|
struct spdk_fd_group *fgrp = reactor->fgrp;
|
|
|
|
if (!fgrp) {
|
|
return;
|
|
}
|
|
|
|
spdk_fd_group_remove(fgrp, reactor->events_fd);
|
|
spdk_fd_group_remove(fgrp, reactor->resched_fd);
|
|
|
|
close(reactor->events_fd);
|
|
close(reactor->resched_fd);
|
|
|
|
spdk_fd_group_destroy(fgrp);
|
|
reactor->fgrp = NULL;
|
|
}
|
|
|
|
static struct spdk_governor *
|
|
_governor_find(const char *name)
|
|
{
|
|
struct spdk_governor *governor, *tmp;
|
|
|
|
TAILQ_FOREACH_SAFE(governor, &g_governor_list, link, tmp) {
|
|
if (strcmp(name, governor->name) == 0) {
|
|
return governor;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
int
|
|
spdk_governor_set(const char *name)
|
|
{
|
|
struct spdk_governor *governor;
|
|
int rc = 0;
|
|
|
|
/* NULL governor was specifically requested */
|
|
if (name == NULL) {
|
|
if (g_governor) {
|
|
g_governor->deinit();
|
|
}
|
|
g_governor = NULL;
|
|
return 0;
|
|
}
|
|
|
|
governor = _governor_find(name);
|
|
if (governor == NULL) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (g_governor == governor) {
|
|
return 0;
|
|
}
|
|
|
|
rc = governor->init();
|
|
if (rc == 0) {
|
|
if (g_governor) {
|
|
g_governor->deinit();
|
|
}
|
|
g_governor = governor;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
struct spdk_governor *
|
|
spdk_governor_get(void)
|
|
{
|
|
return g_governor;
|
|
}
|
|
|
|
void
|
|
spdk_governor_register(struct spdk_governor *governor)
|
|
{
|
|
if (_governor_find(governor->name)) {
|
|
SPDK_ERRLOG("governor named '%s' already registered.\n", governor->name);
|
|
assert(false);
|
|
return;
|
|
}
|
|
|
|
TAILQ_INSERT_TAIL(&g_governor_list, governor, link);
|
|
}
|
|
|
|
SPDK_LOG_REGISTER_COMPONENT(reactor)
|