bdev/nvme: I/O error resiliency can be configured by global options
Add three options for I/O error resiliency to spdk_nvme_bdev_opts. Then the RPC bdev_nvme_set_options can configure these. These can be overridden if these are given by the RPC bdev_nvme_attach_controller. Change-Id: If3ee23aeef8b7585fe0fb5ec4695df5866fc1e74 Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11830 Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
6fb6716d45
commit
0fba8dc8cb
@ -31,6 +31,10 @@ A new flag `ACCEL_FLAG_PERSISTENT` was added to indicate the target memory is PM
|
|||||||
Added `bdev_nvme_add_error_injection` and `bdev_nvme_remove_error_injection` RPCs to add and
|
Added `bdev_nvme_add_error_injection` and `bdev_nvme_remove_error_injection` RPCs to add and
|
||||||
remove NVMe error injections.
|
remove NVMe error injections.
|
||||||
|
|
||||||
|
New parameters, `ctrlr_loss_timeout_sec`, `reconnect_delay_sec`, and `fast_io_fail_timeout_sec`, are
|
||||||
|
added to the RPC `bdev_nvme_set_options`. They can be overridden if they are given by the RPC
|
||||||
|
`bdev_nvme_attach_controller`.
|
||||||
|
|
||||||
### event
|
### event
|
||||||
|
|
||||||
Added `msg_mempool_size` parameter to `spdk_reactors_init` and `spdk_thread_lib_init_ext`.
|
Added `msg_mempool_size` parameter to `spdk_reactors_init` and `spdk_thread_lib_init_ext`.
|
||||||
|
@ -2888,6 +2888,9 @@ Example response:
|
|||||||
Set global parameters for all bdev NVMe. This RPC may only be called before SPDK subsystems have been initialized
|
Set global parameters for all bdev NVMe. This RPC may only be called before SPDK subsystems have been initialized
|
||||||
or any bdev NVMe has been created.
|
or any bdev NVMe has been created.
|
||||||
|
|
||||||
|
Parameters, ctrlr_loss_timeout_sec, reconnect_delay_sec, and fast_io_fail_timeout_sec, are for I/O error resiliency.
|
||||||
|
They can be overridden if they are given by the RPC bdev_nvme_attach_controller.
|
||||||
|
|
||||||
#### Parameters
|
#### Parameters
|
||||||
|
|
||||||
Name | Optional | Type | Description
|
Name | Optional | Type | Description
|
||||||
@ -2908,6 +2911,9 @@ delay_cmd_submit | Optional | boolean | Enable delaying NVMe comma
|
|||||||
transport_retry_count | Optional | number | The number of attempts per I/O in the transport layer before an I/O fails.
|
transport_retry_count | Optional | number | The number of attempts per I/O in the transport layer before an I/O fails.
|
||||||
bdev_retry_count | Optional | number | The number of attempts per I/O in the bdev layer before an I/O fails. -1 means infinite retries.
|
bdev_retry_count | Optional | number | The number of attempts per I/O in the bdev layer before an I/O fails. -1 means infinite retries.
|
||||||
transport_ack_timeout | Optional | number | Time to wait ack until packet retransmission. RDMA specific. Range 0-31 where 0 is driver-specific default value.
|
transport_ack_timeout | Optional | number | Time to wait ack until packet retransmission. RDMA specific. Range 0-31 where 0 is driver-specific default value.
|
||||||
|
ctrlr_loss_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before deleting ctrlr. -1 means infinite reconnects. 0 means no reconnect.
|
||||||
|
reconnect_delay_sec | Optional | number | Time to delay a reconnect trial. 0 means no reconnect.
|
||||||
|
fast_io_fail_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before failing I/O to ctrlr. 0 means no such timeout.
|
||||||
|
|
||||||
#### Example
|
#### Example
|
||||||
|
|
||||||
|
@ -139,6 +139,9 @@ static struct spdk_bdev_nvme_opts g_opts = {
|
|||||||
.delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
|
.delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
|
||||||
.bdev_retry_count = 3,
|
.bdev_retry_count = 3,
|
||||||
.transport_ack_timeout = 0,
|
.transport_ack_timeout = 0,
|
||||||
|
.ctrlr_loss_timeout_sec = 0,
|
||||||
|
.reconnect_delay_sec = 0,
|
||||||
|
.fast_io_fail_timeout_sec = 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL
|
#define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL
|
||||||
@ -3474,6 +3477,15 @@ err:
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts)
|
||||||
|
{
|
||||||
|
opts->prchk_flags = 0;
|
||||||
|
opts->ctrlr_loss_timeout_sec = g_opts.ctrlr_loss_timeout_sec;
|
||||||
|
opts->reconnect_delay_sec = g_opts.reconnect_delay_sec;
|
||||||
|
opts->fast_io_fail_timeout_sec = g_opts.fast_io_fail_timeout_sec;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
|
attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
|
||||||
struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts)
|
struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts)
|
||||||
@ -3587,6 +3599,10 @@ bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
|
|||||||
*opts = g_opts;
|
*opts = g_opts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool bdev_nvme_check_multipath_params(int32_t ctrlr_loss_timeout_sec,
|
||||||
|
uint32_t reconnect_delay_sec,
|
||||||
|
uint32_t fast_io_fail_timeout_sec);
|
||||||
|
|
||||||
static int
|
static int
|
||||||
bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
|
bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
|
||||||
{
|
{
|
||||||
@ -3601,6 +3617,12 @@ bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!bdev_nvme_check_multipath_params(opts->ctrlr_loss_timeout_sec,
|
||||||
|
opts->reconnect_delay_sec,
|
||||||
|
opts->fast_io_fail_timeout_sec)) {
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3979,6 +4001,8 @@ bdev_nvme_create(struct spdk_nvme_transport_id *trid,
|
|||||||
|
|
||||||
if (bdev_opts) {
|
if (bdev_opts) {
|
||||||
memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
|
memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
|
||||||
|
} else {
|
||||||
|
bdev_nvme_get_default_ctrlr_opts(&ctx->bdev_opts);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
|
if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
|
||||||
@ -5678,6 +5702,9 @@ bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w)
|
|||||||
spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
|
spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
|
||||||
spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
|
spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
|
||||||
spdk_json_write_named_uint8(w, "transport_ack_timeout", g_opts.transport_ack_timeout);
|
spdk_json_write_named_uint8(w, "transport_ack_timeout", g_opts.transport_ack_timeout);
|
||||||
|
spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", g_opts.ctrlr_loss_timeout_sec);
|
||||||
|
spdk_json_write_named_uint32(w, "reconnect_delay_sec", g_opts.reconnect_delay_sec);
|
||||||
|
spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec", g_opts.fast_io_fail_timeout_sec);
|
||||||
spdk_json_write_object_end(w);
|
spdk_json_write_object_end(w);
|
||||||
|
|
||||||
spdk_json_write_object_end(w);
|
spdk_json_write_object_end(w);
|
||||||
|
@ -251,6 +251,9 @@ struct spdk_bdev_nvme_opts {
|
|||||||
/* The number of attempts per I/O in the bdev layer before an I/O fails. */
|
/* The number of attempts per I/O in the bdev layer before an I/O fails. */
|
||||||
int32_t bdev_retry_count;
|
int32_t bdev_retry_count;
|
||||||
uint8_t transport_ack_timeout;
|
uint8_t transport_ack_timeout;
|
||||||
|
int32_t ctrlr_loss_timeout_sec;
|
||||||
|
uint32_t reconnect_delay_sec;
|
||||||
|
uint32_t fast_io_fail_timeout_sec;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct spdk_nvme_qpair *bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch);
|
struct spdk_nvme_qpair *bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch);
|
||||||
@ -258,6 +261,8 @@ void bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts);
|
|||||||
int bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts);
|
int bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts);
|
||||||
int bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx);
|
int bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx);
|
||||||
|
|
||||||
|
void bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts);
|
||||||
|
|
||||||
int bdev_nvme_create(struct spdk_nvme_transport_id *trid,
|
int bdev_nvme_create(struct spdk_nvme_transport_id *trid,
|
||||||
const char *base_name,
|
const char *base_name,
|
||||||
const char **names,
|
const char **names,
|
||||||
|
@ -92,6 +92,9 @@ static const struct spdk_json_object_decoder rpc_bdev_nvme_options_decoders[] =
|
|||||||
{"transport_retry_count", offsetof(struct spdk_bdev_nvme_opts, transport_retry_count), spdk_json_decode_uint32, true},
|
{"transport_retry_count", offsetof(struct spdk_bdev_nvme_opts, transport_retry_count), spdk_json_decode_uint32, true},
|
||||||
{"bdev_retry_count", offsetof(struct spdk_bdev_nvme_opts, bdev_retry_count), spdk_json_decode_int32, true},
|
{"bdev_retry_count", offsetof(struct spdk_bdev_nvme_opts, bdev_retry_count), spdk_json_decode_int32, true},
|
||||||
{"transport_ack_timeout", offsetof(struct spdk_bdev_nvme_opts, transport_ack_timeout), spdk_json_decode_uint8, true},
|
{"transport_ack_timeout", offsetof(struct spdk_bdev_nvme_opts, transport_ack_timeout), spdk_json_decode_uint8, true},
|
||||||
|
{"ctrlr_loss_timeout_sec", offsetof(struct spdk_bdev_nvme_opts, ctrlr_loss_timeout_sec), spdk_json_decode_int32, true},
|
||||||
|
{"reconnect_delay_sec", offsetof(struct spdk_bdev_nvme_opts, reconnect_delay_sec), spdk_json_decode_uint32, true},
|
||||||
|
{"fast_io_fail_timeout_sec", offsetof(struct spdk_bdev_nvme_opts, fast_io_fail_timeout_sec), spdk_json_decode_uint32, true},
|
||||||
};
|
};
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -324,6 +327,7 @@ rpc_bdev_nvme_attach_controller(struct spdk_jsonrpc_request *request,
|
|||||||
}
|
}
|
||||||
|
|
||||||
spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->req.drv_opts, sizeof(ctx->req.drv_opts));
|
spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->req.drv_opts, sizeof(ctx->req.drv_opts));
|
||||||
|
bdev_nvme_get_default_ctrlr_opts(&ctx->req.bdev_opts);
|
||||||
|
|
||||||
if (spdk_json_decode_object(params, rpc_bdev_nvme_attach_controller_decoders,
|
if (spdk_json_decode_object(params, rpc_bdev_nvme_attach_controller_decoders,
|
||||||
SPDK_COUNTOF(rpc_bdev_nvme_attach_controller_decoders),
|
SPDK_COUNTOF(rpc_bdev_nvme_attach_controller_decoders),
|
||||||
|
@ -480,7 +480,10 @@ if __name__ == "__main__":
|
|||||||
delay_cmd_submit=args.delay_cmd_submit,
|
delay_cmd_submit=args.delay_cmd_submit,
|
||||||
transport_retry_count=args.transport_retry_count,
|
transport_retry_count=args.transport_retry_count,
|
||||||
bdev_retry_count=args.bdev_retry_count,
|
bdev_retry_count=args.bdev_retry_count,
|
||||||
transport_ack_timeout=args.transport_ack_timeout)
|
transport_ack_timeout=args.transport_ack_timeout,
|
||||||
|
ctrlr_loss_timeout_sec=args.ctrlr_loss_timeout_sec,
|
||||||
|
reconnect_delay_sec=args.reconnect_delay_sec,
|
||||||
|
fast_io_fail_timeout_sec=args.fast_io_fail_timeout_sec)
|
||||||
|
|
||||||
p = subparsers.add_parser('bdev_nvme_set_options', aliases=['set_bdev_nvme_options'],
|
p = subparsers.add_parser('bdev_nvme_set_options', aliases=['set_bdev_nvme_options'],
|
||||||
help='Set options for the bdev nvme type. This is startup command.')
|
help='Set options for the bdev nvme type. This is startup command.')
|
||||||
@ -518,6 +521,29 @@ if __name__ == "__main__":
|
|||||||
p.add_argument('-e', '--transport-ack-timeout',
|
p.add_argument('-e', '--transport-ack-timeout',
|
||||||
help="""Time to wait ack until packet retransmission. RDMA specific.
|
help="""Time to wait ack until packet retransmission. RDMA specific.
|
||||||
Range 0-31 where 0 is driver-specific default value.""", type=int)
|
Range 0-31 where 0 is driver-specific default value.""", type=int)
|
||||||
|
p.add_argument('-l', '--ctrlr-loss-timeout-sec',
|
||||||
|
help="""Time to wait until ctrlr is reconnected before deleting ctrlr.
|
||||||
|
-1 means infinite reconnect retries. 0 means no reconnect retry.
|
||||||
|
If reconnect_delay_sec is zero, ctrlr_loss_timeout_sec has to be zero.
|
||||||
|
If reconnect_delay_sec is non-zero, ctrlr_loss_timeout_sec has to be -1 or not less than
|
||||||
|
reconnect_delay_sec.
|
||||||
|
This can be overridden by bdev_nvme_attach_controller.""",
|
||||||
|
type=int)
|
||||||
|
p.add_argument('-o', '--reconnect-delay-sec',
|
||||||
|
help="""Time to delay a reconnect retry.
|
||||||
|
If ctrlr_loss_timeout_sec is zero, reconnect_delay_sec has to be zero.
|
||||||
|
If ctrlr_loss_timeout_sec is -1, reconnect_delay_sec has to be non-zero.
|
||||||
|
If ctrlr_loss_timeout_sec is not -1 or zero, reconnect_delay_sec has to be non-zero and
|
||||||
|
less than ctrlr_loss_timeout_sec.
|
||||||
|
This can be overridden by bdev_nvme_attach_controller.""",
|
||||||
|
type=int)
|
||||||
|
p.add_argument('-u', '--fast-io-fail-timeout-sec',
|
||||||
|
help="""Time to wait until ctrlr is reconnected before failing I/O to ctrlr.
|
||||||
|
0 means no such timeout.
|
||||||
|
If fast_io_fail_timeout_sec is not zero, it has to be not less than reconnect_delay_sec and
|
||||||
|
less than ctrlr_loss_timeout_sec if ctrlr_loss_timeout_sec is not -1.
|
||||||
|
This can be overridden by bdev_nvme_attach_controller.""",
|
||||||
|
type=int)
|
||||||
|
|
||||||
p.set_defaults(func=bdev_nvme_set_options)
|
p.set_defaults(func=bdev_nvme_set_options)
|
||||||
|
|
||||||
|
@ -443,7 +443,8 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo
|
|||||||
low_priority_weight=None, medium_priority_weight=None, high_priority_weight=None,
|
low_priority_weight=None, medium_priority_weight=None, high_priority_weight=None,
|
||||||
nvme_adminq_poll_period_us=None, nvme_ioq_poll_period_us=None, io_queue_requests=None,
|
nvme_adminq_poll_period_us=None, nvme_ioq_poll_period_us=None, io_queue_requests=None,
|
||||||
delay_cmd_submit=None, transport_retry_count=None, bdev_retry_count=None,
|
delay_cmd_submit=None, transport_retry_count=None, bdev_retry_count=None,
|
||||||
transport_ack_timeout=None):
|
transport_ack_timeout=None, ctrlr_loss_timeout_sec=None, reconnect_delay_sec=None,
|
||||||
|
fast_io_fail_timeout_sec=None):
|
||||||
"""Set options for the bdev nvme. This is startup command.
|
"""Set options for the bdev nvme. This is startup command.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -464,6 +465,22 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo
|
|||||||
bdev_retry_count: The number of attempts per I/O in the bdev layer when an I/O fails. -1 means infinite retries. (optional)
|
bdev_retry_count: The number of attempts per I/O in the bdev layer when an I/O fails. -1 means infinite retries. (optional)
|
||||||
transport_ack_timeout: Time to wait ack until packet retransmission. RDMA specific.
|
transport_ack_timeout: Time to wait ack until packet retransmission. RDMA specific.
|
||||||
Range 0-31 where 0 is driver-specific default value (optional)
|
Range 0-31 where 0 is driver-specific default value (optional)
|
||||||
|
ctrlr_loss_timeout_sec: Time to wait until ctrlr is reconnected before deleting ctrlr.
|
||||||
|
-1 means infinite reconnect retries. 0 means no reconnect retry.
|
||||||
|
If reconnect_delay_sec is zero, ctrlr_loss_timeout_sec has to be zero.
|
||||||
|
If reconnect_delay_sec is non-zero, ctrlr_loss_timeout_sec has to be -1 or not less than reconnect_delay_sec.
|
||||||
|
This can be overridden by bdev_nvme_attach_controller. (optional)
|
||||||
|
reconnect_delay_sec: Time to delay a reconnect retry.
|
||||||
|
If ctrlr_loss_timeout_sec is zero, reconnect_delay_sec has to be zero.
|
||||||
|
If ctrlr_loss_timeout_sec is -1, reconnect_delay_sec has to be non-zero.
|
||||||
|
If ctrlr_loss_timeout_sec is not -1 or zero, reconnect_sec has to be non-zero and less than ctrlr_loss_timeout_sec.
|
||||||
|
This can be overridden by bdev_nvme_attach_controller. (optional)
|
||||||
|
fail_io_fast_timeout_sec: Time to wait until ctrlr is reconnected before failing I/O to ctrlr.
|
||||||
|
0 means no such timeout.
|
||||||
|
If fast_io_fail_timeout_sec is not zero, it has to be not less than reconnect_delay_sec and less than
|
||||||
|
ctrlr_loss_timeout_sec if ctrlr_loss_timeout_sec is not -1.
|
||||||
|
This can be overridden by bdev_nvme_attach_controller. (optional)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
params = {}
|
params = {}
|
||||||
|
|
||||||
@ -516,6 +533,15 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo
|
|||||||
if transport_ack_timeout is not None:
|
if transport_ack_timeout is not None:
|
||||||
params['transport_ack_timeout'] = transport_ack_timeout
|
params['transport_ack_timeout'] = transport_ack_timeout
|
||||||
|
|
||||||
|
if ctrlr_loss_timeout_sec is not None:
|
||||||
|
params['ctrlr_loss_timeout_sec'] = ctrlr_loss_timeout_sec
|
||||||
|
|
||||||
|
if reconnect_delay_sec is not None:
|
||||||
|
params['reconnect_delay_sec'] = reconnect_delay_sec
|
||||||
|
|
||||||
|
if fast_io_fail_timeout_sec is not None:
|
||||||
|
params['fast_io_fail_timeout_sec'] = fast_io_fail_timeout_sec
|
||||||
|
|
||||||
return client.call('bdev_nvme_set_options', params)
|
return client.call('bdev_nvme_set_options', params)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user