nvmf: Add the Keep Alive feature

The controller shall treat a Keep Alive Timeout in the same manner
as connection loss. If the Keep Alive feature is in use and the
timer expires, then the controller shall:
1, stop processing commands and set the Controller Fatal Status
(CSTS,CFS) bit to '1';
2, terminate the NVMe Transport connection;
3, break the host to controller association;
A timer poller is added to each subsystem to monitor timeout event.

Change-Id: I001afab8a6764f30c39df37fa96384180d117486
Signed-off-by: JinYu <jin.yu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/439330
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
This commit is contained in:
JinYu 2019-01-07 18:47:29 +08:00 committed by Jim Harris
parent 058c4c890b
commit b8769cdb08
3 changed files with 191 additions and 8 deletions

View File

@ -47,7 +47,10 @@
#include "spdk_internal/log.h"
#define MIN_KEEP_ALIVE_TIMEOUT 10000
#define MIN_KEEP_ALIVE_TIMEOUT_IN_MS 10000
#define NVMF_DISC_KATO_IN_MS 120000
#define KAS_DEFAULT_VALUE 10
#define KAS_TIME_UNIT_IN_MS 100
#define MODEL_NUMBER "SPDK bdev Controller"
@ -72,6 +75,110 @@ spdk_nvmf_invalid_connect_response(struct spdk_nvmf_fabric_connect_rsp *rsp,
#define SPDK_NVMF_INVALID_CONNECT_DATA(rsp, field) \
spdk_nvmf_invalid_connect_response(rsp, 1, offsetof(struct spdk_nvmf_fabric_connect_data, field))
static void
spdk_nvmf_ctrlr_stop_keep_alive_timer(struct spdk_nvmf_ctrlr *ctrlr)
{
if (!ctrlr) {
SPDK_ERRLOG("Controller is NULL\n");
return;
}
if (ctrlr->keep_alive_poller == NULL) {
return;
}
SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Stop keep alive poller\n");
spdk_poller_unregister(&ctrlr->keep_alive_poller);
}
static void
spdk_nvmf_ctrlr_disconnect_qpairs_done(struct spdk_io_channel_iter *i, int status)
{
if (status == 0) {
SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ctrlr disconnect qpairs complete successfully\n");
} else {
SPDK_ERRLOG("Fail to disconnect ctrlr qpairs\n");
}
}
static void
spdk_nvmf_ctrlr_disconnect_qpairs_on_pg(struct spdk_io_channel_iter *i)
{
int rc = 0;
struct spdk_nvmf_ctrlr *ctrlr;
struct spdk_nvmf_qpair *qpair, *temp_qpair;
struct spdk_io_channel *ch;
struct spdk_nvmf_poll_group *group;
ctrlr = spdk_io_channel_iter_get_ctx(i);
ch = spdk_io_channel_iter_get_channel(i);
group = spdk_io_channel_get_ctx(ch);
TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, temp_qpair) {
if (qpair->ctrlr == ctrlr) {
rc = spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
if (rc) {
SPDK_ERRLOG("Qpair disconnect failed\n");
goto next_channel;
}
}
}
next_channel:
spdk_for_each_channel_continue(i, rc);
}
static int
spdk_nvmf_ctrlr_keep_alive_poll(void *ctx)
{
uint64_t keep_alive_timeout_tick;
uint64_t now = spdk_get_ticks();
struct spdk_nvmf_ctrlr *ctrlr = ctx;
SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Polling ctrlr keep alive timeout\n");
/* If the Keep alive feature is in use and the timer expires */
keep_alive_timeout_tick = ctrlr->last_keep_alive_tick +
ctrlr->feat.keep_alive_timer.bits.kato * spdk_get_ticks_hz() / UINT64_C(1000);
if (now > keep_alive_timeout_tick) {
/* set the Controller Fatal Status bit to '1' */
if (ctrlr->vcprop.csts.bits.cfs == 0) {
ctrlr->vcprop.csts.bits.cfs = 1;
/*
* disconnect qpairs, terminate Transport connection
* destroy ctrlr, break the host to controller association
* disconnect qpairs with qpair->ctrlr == ctrlr
*/
spdk_for_each_channel(ctrlr->subsys->tgt,
spdk_nvmf_ctrlr_disconnect_qpairs_on_pg,
ctrlr,
spdk_nvmf_ctrlr_disconnect_qpairs_done);
}
}
return 1;
}
static void
spdk_nvmf_ctrlr_start_keep_alive_timer(struct spdk_nvmf_ctrlr *ctrlr)
{
if (!ctrlr) {
SPDK_ERRLOG("Controller is NULL\n");
return;
}
/* if cleared to 0 then the Keep Alive Timer is disabled */
if (ctrlr->feat.keep_alive_timer.bits.kato != 0) {
ctrlr->last_keep_alive_tick = spdk_get_ticks();
SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Ctrlr add keep alive poller\n");
ctrlr->keep_alive_poller = spdk_poller_register(spdk_nvmf_ctrlr_keep_alive_poll, ctrlr,
ctrlr->feat.keep_alive_timer.bits.kato * 1000);
}
}
static void
ctrlr_add_qpair_and_update_rsp(struct spdk_nvmf_qpair *qpair,
struct spdk_nvmf_ctrlr *ctrlr,
@ -121,6 +228,7 @@ _spdk_nvmf_ctrlr_add_admin_qpair(void *ctx)
struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
ctrlr->admin_qpair = qpair;
spdk_nvmf_ctrlr_start_keep_alive_timer(ctrlr);
ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);
spdk_nvmf_request_complete(req);
}
@ -172,10 +280,43 @@ spdk_nvmf_ctrlr_create(struct spdk_nvmf_subsystem *subsystem,
return NULL;
}
ctrlr->feat.keep_alive_timer.bits.kato = connect_cmd->kato;
/*
* Because Identify Controller data KAS default is 10
* KAS: this field indicates the granularity of the Keep Alive Timer in 100ms units
* keep-alive timeout in milliseconds
*/
ctrlr->feat.keep_alive_timer.bits.kato = spdk_divide_round_up(connect_cmd->kato,
KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS) *
KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS;
ctrlr->feat.async_event_configuration.bits.ns_attr_notice = 1;
ctrlr->feat.volatile_write_cache.bits.wce = 1;
if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
/* Don't accept keep-alive timeout for discovery controllers */
if (ctrlr->feat.keep_alive_timer.bits.kato != 0) {
SPDK_ERRLOG("Discovery controller don't accept keep-alive timeout\n");
spdk_bit_array_free(&ctrlr->qpair_mask);
free(ctrlr);
return NULL;
}
/*
* Discovery controllers use some arbitrary high value in order
* to cleanup stale discovery sessions
*
* From the 1.0a nvme-of spec:
* "The Keep Alive command is reserved for
* Discovery controllers. A transport may specify a
* fixed Discovery controller activity timeout value
* (e.g., 2 minutes). If no commands are received
* by a Discovery controller within that time
* period, the controller may perform the
* actions for Keep Alive Timer expiration".
* kato is in millisecond.
*/
ctrlr->feat.keep_alive_timer.bits.kato = NVMF_DISC_KATO_IN_MS;
}
/* Subtract 1 for admin queue, 1 for 0's based */
ctrlr->feat.number_of_queues.bits.ncqr = transport->opts.max_qpairs_per_ctrlr - 1 -
1;
@ -217,12 +358,21 @@ spdk_nvmf_ctrlr_create(struct spdk_nvmf_subsystem *subsystem,
return ctrlr;
}
static void
_spdk_nvmf_ctrlr_destruct(void *ctx)
{
struct spdk_nvmf_ctrlr *ctrlr = ctx;
spdk_nvmf_ctrlr_stop_keep_alive_timer(ctrlr);
free(ctrlr);
}
void
spdk_nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr)
{
spdk_nvmf_subsystem_remove_ctrlr(ctrlr->subsys, ctrlr);
free(ctrlr);
spdk_thread_send_msg(ctrlr->thread, _spdk_nvmf_ctrlr_destruct, ctrlr);
}
static void
@ -265,7 +415,6 @@ spdk_nvmf_ctrlr_add_io_qpair(void *ctx)
}
ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);
end:
spdk_thread_send_msg(qpair->group->thread, _spdk_nvmf_request_complete, req);
}
@ -877,12 +1026,31 @@ spdk_nvmf_ctrlr_set_features_keep_alive_timer(struct spdk_nvmf_request *req)
SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Keep Alive Timer (%u ms)\n", cmd->cdw11);
/*
* if attempts to disable keep alive by setting kato to 0h
* a status value of keep alive invalid shall be returned
*/
if (cmd->cdw11 == 0) {
rsp->status.sc = SPDK_NVME_SC_KEEP_ALIVE_INVALID;
} else if (cmd->cdw11 < MIN_KEEP_ALIVE_TIMEOUT) {
ctrlr->feat.keep_alive_timer.bits.kato = MIN_KEEP_ALIVE_TIMEOUT;
} else if (cmd->cdw11 < MIN_KEEP_ALIVE_TIMEOUT_IN_MS) {
ctrlr->feat.keep_alive_timer.bits.kato = MIN_KEEP_ALIVE_TIMEOUT_IN_MS;
} else {
ctrlr->feat.keep_alive_timer.bits.kato = cmd->cdw11;
/* round up to milliseconds */
ctrlr->feat.keep_alive_timer.bits.kato = spdk_divide_round_up(cmd->cdw11,
KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS) *
KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS;
}
/*
* if change the keep alive timeout value successfully
* update the keep alive poller.
*/
if (cmd->cdw11 != 0) {
if (ctrlr->keep_alive_poller != NULL) {
spdk_poller_unregister(&ctrlr->keep_alive_poller);
}
ctrlr->keep_alive_poller = spdk_poller_register(spdk_nvmf_ctrlr_keep_alive_poll, ctrlr,
ctrlr->feat.keep_alive_timer.bits.kato * 1000);
}
SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Keep Alive Timer set to %u ms\n",
@ -1225,7 +1393,7 @@ spdk_nvmf_ctrlr_identify_ctrlr(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_c
if (subsystem->subtype == SPDK_NVMF_SUBTYPE_NVME) {
spdk_strcpy_pad(cdata->mn, MODEL_NUMBER, sizeof(cdata->mn), ' ');
spdk_strcpy_pad(cdata->sn, spdk_nvmf_subsystem_get_sn(subsystem), sizeof(cdata->sn), ' ');
cdata->kas = 10;
cdata->kas = KAS_DEFAULT_VALUE;
cdata->rab = 6;
cdata->cmic.multi_port = 1;
@ -1592,6 +1760,8 @@ spdk_nvmf_ctrlr_set_features(struct spdk_nvmf_request *req)
static int
spdk_nvmf_ctrlr_keep_alive(struct spdk_nvmf_request *req)
{
struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Keep Alive\n");
/*
* To handle keep alive just clear or reset the
@ -1601,6 +1771,8 @@ spdk_nvmf_ctrlr_keep_alive(struct spdk_nvmf_request *req)
* keep alive has exceeded the max duration and
* take appropriate action.
*/
ctrlr->last_keep_alive_tick = spdk_get_ticks();
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}

View File

@ -236,6 +236,10 @@ struct spdk_nvmf_ctrlr {
uint16_t changed_ns_list_count;
struct spdk_nvme_ns_list changed_ns_list;
/* Time to trigger keep-alive--poller_time = now_tick + period */
uint64_t last_keep_alive_tick;
struct spdk_poller *keep_alive_poller;
TAILQ_ENTRY(spdk_nvmf_ctrlr) link;
};

View File

@ -120,6 +120,12 @@ DEFINE_STUB(spdk_nvmf_transport_qpair_set_sqsize,
(struct spdk_nvmf_qpair *qpair),
0);
int
spdk_nvmf_qpair_disconnect(struct spdk_nvmf_qpair *qpair, nvmf_qpair_disconnect_cb cb_fn, void *ctx)
{
return 0;
}
void
spdk_nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata)
{
@ -315,6 +321,7 @@ test_connect(void)
CU_ASSERT(rc == SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS);
CU_ASSERT(nvme_status_success(&rsp.nvme_cpl.status));
CU_ASSERT(qpair.ctrlr != NULL);
spdk_nvmf_ctrlr_stop_keep_alive_timer(qpair.ctrlr);
spdk_bit_array_free(&qpair.ctrlr->qpair_mask);
free(qpair.ctrlr);
qpair.ctrlr = NULL;