bdev/nvme: detect Controller Fatal Status when timeout happens
If the controller has a serious error and set the Controller Fatal Status field to 1, host driver does not know this error, while here, when timeout happens, try to detect the CFS and reset the controller to recover from such fatal status. Change-Id: I9fa5b263b34edc52d0f359d874b2920f7570d1f3 Signed-off-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-on: https://review.gerrithub.io/417622 Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
23ef5c447b
commit
6fac5e5b23
@ -669,7 +669,10 @@ struct spdk_nvme_qpair;
|
||||
* request.
|
||||
*
|
||||
* For timeouts detected on the admin queue pair, the qpair returned here will
|
||||
* be NULL.
|
||||
* be NULL. If the controller has a serious error condition and is unable to
|
||||
* communicate with driver via completion queue, the controller can set Controller
|
||||
* Fatal Status field to 1, then reset is required to recover from such error.
|
||||
* Users may detect Controller Fatal Status when timeout happens.
|
||||
*
|
||||
* \param cb_arg Argument passed to callback funciton.
|
||||
* \param ctrlr Opaque handle to NVMe controller.
|
||||
|
@ -851,9 +851,20 @@ timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr,
|
||||
struct spdk_nvme_qpair *qpair, uint16_t cid)
|
||||
{
|
||||
int rc;
|
||||
union spdk_nvme_csts_register csts;
|
||||
|
||||
SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid);
|
||||
|
||||
csts = spdk_nvme_ctrlr_get_regs_csts(ctrlr);
|
||||
if (csts.bits.cfs) {
|
||||
SPDK_ERRLOG("Controller Fatal Status, reset required\n");
|
||||
rc = spdk_nvme_ctrlr_reset(ctrlr);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Resetting controller failed.\n");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
switch (g_opts.action_on_timeout) {
|
||||
case SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT:
|
||||
if (qpair) {
|
||||
|
Loading…
Reference in New Issue
Block a user