bdev/nvme: Allow the user to control the I/O timeout behavior
The user can now not only specify an optional timeout for commands, but also the action to take when a timeout is detected. Change-Id: I7d7cdd846d580e0b3a5f733d398ee9b19d6fe034 Signed-off-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
parent
193f4f8392
commit
acd0b4573d
@ -100,10 +100,13 @@
|
|||||||
# The number of attempts per I/O when an I/O fails. Do not include
|
# The number of attempts per I/O when an I/O fails. Do not include
|
||||||
# this key to get the default behavior.
|
# this key to get the default behavior.
|
||||||
NvmeRetryCount 4
|
NvmeRetryCount 4
|
||||||
# Registers the application to receive timeout callback and to reset the controller.
|
# Timeout for each command, in seconds. If 0, don't track timeouts.
|
||||||
ResetControllerOnTimeout Yes
|
NvmeTimeoutValue 0
|
||||||
# Timeout value.
|
# Action to take on command time out. Only valid when Timeout is greater
|
||||||
NvmeTimeoutValue 30
|
# than 0. This may be 'Reset' to reset the controller, 'Abort' to abort
|
||||||
|
# the command, or 'None' to just print a message but do nothing.
|
||||||
|
# Admin command timeouts will always result in a reset.
|
||||||
|
ActionOnTimeout None
|
||||||
# Set how often the admin queue is polled for asynchronous events.
|
# Set how often the admin queue is polled for asynchronous events.
|
||||||
# Units in microseconds.
|
# Units in microseconds.
|
||||||
AdminPollRate 100000
|
AdminPollRate 100000
|
||||||
|
@ -91,10 +91,13 @@
|
|||||||
# The number of attempts per I/O when an I/O fails. Do not include
|
# The number of attempts per I/O when an I/O fails. Do not include
|
||||||
# this key to get the default behavior.
|
# this key to get the default behavior.
|
||||||
NvmeRetryCount 4
|
NvmeRetryCount 4
|
||||||
# Registers the application to receive timeout callback and to reset the controller.
|
# Timeout for each command, in seconds. If 0, don't track timeouts.
|
||||||
ResetControllerOnTimeout Yes
|
NvmeTimeoutValue 0
|
||||||
# Timeout value.
|
# Action to take on command time out. Only valid when Timeout is greater
|
||||||
NvmeTimeoutValue 30
|
# than 0. This may be 'Reset' to reset the controller, 'Abort' to abort
|
||||||
|
# the command, or 'None' to just print a message but do nothing.
|
||||||
|
# Admin command timeouts will always result in a reset.
|
||||||
|
ActionOnTimeout None
|
||||||
# Set how often the admin queue is polled for asynchronous events.
|
# Set how often the admin queue is polled for asynchronous events.
|
||||||
# Units in microseconds.
|
# Units in microseconds.
|
||||||
AdminPollRate 100000
|
AdminPollRate 100000
|
||||||
|
@ -79,13 +79,13 @@
|
|||||||
# The number of attempts per I/O when an I/O fails. Do not include
|
# The number of attempts per I/O when an I/O fails. Do not include
|
||||||
# this key to get the default behavior.
|
# this key to get the default behavior.
|
||||||
NvmeRetryCount 4
|
NvmeRetryCount 4
|
||||||
# The maximum number of NVMe controllers to claim. Do not include this key to
|
# Timeout for each command, in seconds. If 0, don't track timeouts.
|
||||||
# claim all of them.
|
NvmeTimeoutValue 0
|
||||||
NumControllers 2
|
# Action to take on command time out. Only valid when Timeout is greater
|
||||||
# Registers the application to receive timeout callback and to reset the controller.
|
# than 0. This may be 'Reset' to reset the controller, 'Abort' to abort
|
||||||
ResetControllerOnTimeout Yes
|
# the command, or 'None' to just print a message but do nothing.
|
||||||
# Timeout value.
|
# Admin command timeouts will always result in a reset.
|
||||||
NvmeTimeoutValue 30
|
ActionOnTimeout None
|
||||||
# Set how often the admin queue is polled for asynchronous events.
|
# Set how often the admin queue is polled for asynchronous events.
|
||||||
# Units in microseconds.
|
# Units in microseconds.
|
||||||
AdminPollRate 100000
|
AdminPollRate 100000
|
||||||
|
@ -111,8 +111,14 @@ struct nvme_probe_ctx {
|
|||||||
const char *names[NVME_MAX_CONTROLLERS];
|
const char *names[NVME_MAX_CONTROLLERS];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum timeout_action {
|
||||||
|
TIMEOUT_ACTION_NONE = 0,
|
||||||
|
TIMEOUT_ACTION_RESET,
|
||||||
|
TIMEOUT_ACTION_ABORT,
|
||||||
|
};
|
||||||
|
|
||||||
static int g_hot_insert_nvme_controller_index = 0;
|
static int g_hot_insert_nvme_controller_index = 0;
|
||||||
static bool g_reset_controller_on_timeout = false;
|
static enum timeout_action g_action_on_timeout = TIMEOUT_ACTION_NONE;
|
||||||
static int g_timeout = 0;
|
static int g_timeout = 0;
|
||||||
static int g_nvme_adminq_poll_timeout_us = 0;
|
static int g_nvme_adminq_poll_timeout_us = 0;
|
||||||
static bool g_nvme_hotplug_enabled;
|
static bool g_nvme_hotplug_enabled;
|
||||||
@ -559,6 +565,21 @@ probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
spdk_nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl)
|
||||||
|
{
|
||||||
|
struct spdk_nvme_ctrlr *ctrlr = ctx;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
if (spdk_nvme_cpl_is_error(cpl)) {
|
||||||
|
SPDK_WARNLOG("Abort failed. Resetting controller.\n");
|
||||||
|
rc = spdk_nvme_ctrlr_reset(ctrlr);
|
||||||
|
if (rc) {
|
||||||
|
SPDK_ERRLOG("Resetting controller failed.\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
|
timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
|
||||||
struct spdk_nvme_qpair *qpair, uint16_t cid)
|
struct spdk_nvme_qpair *qpair, uint16_t cid)
|
||||||
@ -567,9 +588,27 @@ timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
|
|||||||
|
|
||||||
SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid);
|
SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid);
|
||||||
|
|
||||||
rc = spdk_nvme_ctrlr_reset(ctrlr);
|
switch (g_action_on_timeout) {
|
||||||
if (rc) {
|
case TIMEOUT_ACTION_ABORT:
|
||||||
SPDK_ERRLOG("resetting controller failed\n");
|
if (qpair) {
|
||||||
|
rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid,
|
||||||
|
spdk_nvme_abort_cpl, ctrlr);
|
||||||
|
if (rc == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
SPDK_ERRLOG("Unable to send abort. Resetting.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fallthrough */
|
||||||
|
case TIMEOUT_ACTION_RESET:
|
||||||
|
rc = spdk_nvme_ctrlr_reset(ctrlr);
|
||||||
|
if (rc) {
|
||||||
|
SPDK_ERRLOG("Resetting controller failed.\n");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case TIMEOUT_ACTION_NONE:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -621,7 +660,7 @@ attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
|
|||||||
sizeof(struct nvme_io_channel));
|
sizeof(struct nvme_io_channel));
|
||||||
TAILQ_INSERT_TAIL(&g_nvme_ctrlrs, nvme_ctrlr, tailq);
|
TAILQ_INSERT_TAIL(&g_nvme_ctrlrs, nvme_ctrlr, tailq);
|
||||||
|
|
||||||
if (g_reset_controller_on_timeout) {
|
if (g_action_on_timeout != TIMEOUT_ACTION_NONE) {
|
||||||
spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_timeout,
|
spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_timeout,
|
||||||
timeout_cb, NULL);
|
timeout_cb, NULL);
|
||||||
}
|
}
|
||||||
@ -763,13 +802,32 @@ bdev_nvme_library_init(void)
|
|||||||
probe_ctx.count++;
|
probe_ctx.count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
g_reset_controller_on_timeout =
|
|
||||||
spdk_conf_section_get_boolval(sp, "ResetControllerOnTimeout", false);
|
|
||||||
|
|
||||||
if ((g_timeout = spdk_conf_section_get_intval(sp, "NvmeTimeoutValue")) < 0) {
|
if ((g_timeout = spdk_conf_section_get_intval(sp, "NvmeTimeoutValue")) < 0) {
|
||||||
g_timeout = 0;
|
g_timeout = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (g_timeout > 0) {
|
||||||
|
val = spdk_conf_section_get_val(sp, "ActionOnTimeout");
|
||||||
|
if (val != NULL) {
|
||||||
|
if (!strcasecmp(val, "Reset")) {
|
||||||
|
g_action_on_timeout = TIMEOUT_ACTION_RESET;
|
||||||
|
} else if (!strcasecmp(val, "Abort")) {
|
||||||
|
g_action_on_timeout = TIMEOUT_ACTION_ABORT;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* Handle old name for backward compatibility */
|
||||||
|
val = spdk_conf_section_get_val(sp, "ResetControllerOnTimeout");
|
||||||
|
if (val) {
|
||||||
|
SPDK_WARNLOG("ResetControllerOnTimeout was renamed to ActionOnTimeout\n");
|
||||||
|
SPDK_WARNLOG("Please update your configuration file\n");
|
||||||
|
|
||||||
|
if (spdk_conf_section_get_boolval(sp, "ResetControllerOnTimeout", false)) {
|
||||||
|
g_action_on_timeout = TIMEOUT_ACTION_RESET;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
g_nvme_adminq_poll_timeout_us = spdk_conf_section_get_intval(sp, "AdminPollRate");
|
g_nvme_adminq_poll_timeout_us = spdk_conf_section_get_intval(sp, "AdminPollRate");
|
||||||
if (g_nvme_adminq_poll_timeout_us <= 0) {
|
if (g_nvme_adminq_poll_timeout_us <= 0) {
|
||||||
g_nvme_adminq_poll_timeout_us = 1000000;
|
g_nvme_adminq_poll_timeout_us = 1000000;
|
||||||
|
Loading…
Reference in New Issue
Block a user