bdev/nvme: I/O error resiliency can be configured by global options
Add three options for I/O error resiliency to spdk_nvme_bdev_opts. Then the RPC bdev_nvme_set_options can configure these. These can be overridden if these are given by the RPC bdev_nvme_attach_controller. Change-Id: If3ee23aeef8b7585fe0fb5ec4695df5866fc1e74 Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11830 Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
6fb6716d45
commit
0fba8dc8cb
@ -31,6 +31,10 @@ A new flag `ACCEL_FLAG_PERSISTENT` was added to indicate the target memory is PM
|
||||
Added `bdev_nvme_add_error_injection` and `bdev_nvme_remove_error_injection` RPCs to add and
|
||||
remove NVMe error injections.
|
||||
|
||||
New parameters, `ctrlr_loss_timeout_sec`, `reconnect_delay_sec`, and `fast_io_fail_timeout_sec`, are
|
||||
added to the RPC `bdev_nvme_set_options`. They can be overridden if they are given by the RPC
|
||||
`bdev_nvme_attach_controller`.
|
||||
|
||||
### event
|
||||
|
||||
Added `msg_mempool_size` parameter to `spdk_reactors_init` and `spdk_thread_lib_init_ext`.
|
||||
|
@ -2888,6 +2888,9 @@ Example response:
|
||||
Set global parameters for all bdev NVMe. This RPC may only be called before SPDK subsystems have been initialized
|
||||
or any bdev NVMe has been created.
|
||||
|
||||
Parameters, ctrlr_loss_timeout_sec, reconnect_delay_sec, and fast_io_fail_timeout_sec, are for I/O error resiliency.
|
||||
They can be overridden if they are given by the RPC bdev_nvme_attach_controller.
|
||||
|
||||
#### Parameters
|
||||
|
||||
Name | Optional | Type | Description
|
||||
@ -2908,6 +2911,9 @@ delay_cmd_submit | Optional | boolean | Enable delaying NVMe comma
|
||||
transport_retry_count | Optional | number | The number of attempts per I/O in the transport layer before an I/O fails.
|
||||
bdev_retry_count | Optional | number | The number of attempts per I/O in the bdev layer before an I/O fails. -1 means infinite retries.
|
||||
transport_ack_timeout | Optional | number | Time to wait ack until packet retransmission. RDMA specific. Range 0-31 where 0 is driver-specific default value.
|
||||
ctrlr_loss_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before deleting ctrlr. -1 means infinite reconnects. 0 means no reconnect.
|
||||
reconnect_delay_sec | Optional | number | Time to delay a reconnect trial. 0 means no reconnect.
|
||||
fast_io_fail_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before failing I/O to ctrlr. 0 means no such timeout.
|
||||
|
||||
#### Example
|
||||
|
||||
|
@ -139,6 +139,9 @@ static struct spdk_bdev_nvme_opts g_opts = {
|
||||
.delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
|
||||
.bdev_retry_count = 3,
|
||||
.transport_ack_timeout = 0,
|
||||
.ctrlr_loss_timeout_sec = 0,
|
||||
.reconnect_delay_sec = 0,
|
||||
.fast_io_fail_timeout_sec = 0,
|
||||
};
|
||||
|
||||
#define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL
|
||||
@ -3474,6 +3477,15 @@ err:
|
||||
return rc;
|
||||
}
|
||||
|
||||
void
|
||||
bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts)
|
||||
{
|
||||
opts->prchk_flags = 0;
|
||||
opts->ctrlr_loss_timeout_sec = g_opts.ctrlr_loss_timeout_sec;
|
||||
opts->reconnect_delay_sec = g_opts.reconnect_delay_sec;
|
||||
opts->fast_io_fail_timeout_sec = g_opts.fast_io_fail_timeout_sec;
|
||||
}
|
||||
|
||||
static void
|
||||
attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
|
||||
struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts)
|
||||
@ -3587,6 +3599,10 @@ bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
|
||||
*opts = g_opts;
|
||||
}
|
||||
|
||||
static bool bdev_nvme_check_multipath_params(int32_t ctrlr_loss_timeout_sec,
|
||||
uint32_t reconnect_delay_sec,
|
||||
uint32_t fast_io_fail_timeout_sec);
|
||||
|
||||
static int
|
||||
bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
|
||||
{
|
||||
@ -3601,6 +3617,12 @@ bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!bdev_nvme_check_multipath_params(opts->ctrlr_loss_timeout_sec,
|
||||
opts->reconnect_delay_sec,
|
||||
opts->fast_io_fail_timeout_sec)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3979,6 +4001,8 @@ bdev_nvme_create(struct spdk_nvme_transport_id *trid,
|
||||
|
||||
if (bdev_opts) {
|
||||
memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
|
||||
} else {
|
||||
bdev_nvme_get_default_ctrlr_opts(&ctx->bdev_opts);
|
||||
}
|
||||
|
||||
if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
|
||||
@ -5678,6 +5702,9 @@ bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w)
|
||||
spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
|
||||
spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
|
||||
spdk_json_write_named_uint8(w, "transport_ack_timeout", g_opts.transport_ack_timeout);
|
||||
spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", g_opts.ctrlr_loss_timeout_sec);
|
||||
spdk_json_write_named_uint32(w, "reconnect_delay_sec", g_opts.reconnect_delay_sec);
|
||||
spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec", g_opts.fast_io_fail_timeout_sec);
|
||||
spdk_json_write_object_end(w);
|
||||
|
||||
spdk_json_write_object_end(w);
|
||||
|
@ -251,6 +251,9 @@ struct spdk_bdev_nvme_opts {
|
||||
/* The number of attempts per I/O in the bdev layer before an I/O fails. */
|
||||
int32_t bdev_retry_count;
|
||||
uint8_t transport_ack_timeout;
|
||||
int32_t ctrlr_loss_timeout_sec;
|
||||
uint32_t reconnect_delay_sec;
|
||||
uint32_t fast_io_fail_timeout_sec;
|
||||
};
|
||||
|
||||
struct spdk_nvme_qpair *bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch);
|
||||
@ -258,6 +261,8 @@ void bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts);
|
||||
int bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts);
|
||||
int bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx);
|
||||
|
||||
void bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts);
|
||||
|
||||
int bdev_nvme_create(struct spdk_nvme_transport_id *trid,
|
||||
const char *base_name,
|
||||
const char **names,
|
||||
|
@ -92,6 +92,9 @@ static const struct spdk_json_object_decoder rpc_bdev_nvme_options_decoders[] =
|
||||
{"transport_retry_count", offsetof(struct spdk_bdev_nvme_opts, transport_retry_count), spdk_json_decode_uint32, true},
|
||||
{"bdev_retry_count", offsetof(struct spdk_bdev_nvme_opts, bdev_retry_count), spdk_json_decode_int32, true},
|
||||
{"transport_ack_timeout", offsetof(struct spdk_bdev_nvme_opts, transport_ack_timeout), spdk_json_decode_uint8, true},
|
||||
{"ctrlr_loss_timeout_sec", offsetof(struct spdk_bdev_nvme_opts, ctrlr_loss_timeout_sec), spdk_json_decode_int32, true},
|
||||
{"reconnect_delay_sec", offsetof(struct spdk_bdev_nvme_opts, reconnect_delay_sec), spdk_json_decode_uint32, true},
|
||||
{"fast_io_fail_timeout_sec", offsetof(struct spdk_bdev_nvme_opts, fast_io_fail_timeout_sec), spdk_json_decode_uint32, true},
|
||||
};
|
||||
|
||||
static void
|
||||
@ -324,6 +327,7 @@ rpc_bdev_nvme_attach_controller(struct spdk_jsonrpc_request *request,
|
||||
}
|
||||
|
||||
spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->req.drv_opts, sizeof(ctx->req.drv_opts));
|
||||
bdev_nvme_get_default_ctrlr_opts(&ctx->req.bdev_opts);
|
||||
|
||||
if (spdk_json_decode_object(params, rpc_bdev_nvme_attach_controller_decoders,
|
||||
SPDK_COUNTOF(rpc_bdev_nvme_attach_controller_decoders),
|
||||
|
@ -480,7 +480,10 @@ if __name__ == "__main__":
|
||||
delay_cmd_submit=args.delay_cmd_submit,
|
||||
transport_retry_count=args.transport_retry_count,
|
||||
bdev_retry_count=args.bdev_retry_count,
|
||||
transport_ack_timeout=args.transport_ack_timeout)
|
||||
transport_ack_timeout=args.transport_ack_timeout,
|
||||
ctrlr_loss_timeout_sec=args.ctrlr_loss_timeout_sec,
|
||||
reconnect_delay_sec=args.reconnect_delay_sec,
|
||||
fast_io_fail_timeout_sec=args.fast_io_fail_timeout_sec)
|
||||
|
||||
p = subparsers.add_parser('bdev_nvme_set_options', aliases=['set_bdev_nvme_options'],
|
||||
help='Set options for the bdev nvme type. This is startup command.')
|
||||
@ -518,6 +521,29 @@ if __name__ == "__main__":
|
||||
p.add_argument('-e', '--transport-ack-timeout',
|
||||
help="""Time to wait ack until packet retransmission. RDMA specific.
|
||||
Range 0-31 where 0 is driver-specific default value.""", type=int)
|
||||
p.add_argument('-l', '--ctrlr-loss-timeout-sec',
|
||||
help="""Time to wait until ctrlr is reconnected before deleting ctrlr.
|
||||
-1 means infinite reconnect retries. 0 means no reconnect retry.
|
||||
If reconnect_delay_sec is zero, ctrlr_loss_timeout_sec has to be zero.
|
||||
If reconnect_delay_sec is non-zero, ctrlr_loss_timeout_sec has to be -1 or not less than
|
||||
reconnect_delay_sec.
|
||||
This can be overridden by bdev_nvme_attach_controller.""",
|
||||
type=int)
|
||||
p.add_argument('-o', '--reconnect-delay-sec',
|
||||
help="""Time to delay a reconnect retry.
|
||||
If ctrlr_loss_timeout_sec is zero, reconnect_delay_sec has to be zero.
|
||||
If ctrlr_loss_timeout_sec is -1, reconnect_delay_sec has to be non-zero.
|
||||
If ctrlr_loss_timeout_sec is not -1 or zero, reconnect_delay_sec has to be non-zero and
|
||||
less than ctrlr_loss_timeout_sec.
|
||||
This can be overridden by bdev_nvme_attach_controller.""",
|
||||
type=int)
|
||||
p.add_argument('-u', '--fast-io-fail-timeout-sec',
|
||||
help="""Time to wait until ctrlr is reconnected before failing I/O to ctrlr.
|
||||
0 means no such timeout.
|
||||
If fast_io_fail_timeout_sec is not zero, it has to be not less than reconnect_delay_sec and
|
||||
less than ctrlr_loss_timeout_sec if ctrlr_loss_timeout_sec is not -1.
|
||||
This can be overridden by bdev_nvme_attach_controller.""",
|
||||
type=int)
|
||||
|
||||
p.set_defaults(func=bdev_nvme_set_options)
|
||||
|
||||
|
@ -443,7 +443,8 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo
|
||||
low_priority_weight=None, medium_priority_weight=None, high_priority_weight=None,
|
||||
nvme_adminq_poll_period_us=None, nvme_ioq_poll_period_us=None, io_queue_requests=None,
|
||||
delay_cmd_submit=None, transport_retry_count=None, bdev_retry_count=None,
|
||||
transport_ack_timeout=None):
|
||||
transport_ack_timeout=None, ctrlr_loss_timeout_sec=None, reconnect_delay_sec=None,
|
||||
fast_io_fail_timeout_sec=None):
|
||||
"""Set options for the bdev nvme. This is startup command.
|
||||
|
||||
Args:
|
||||
@ -464,6 +465,22 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo
|
||||
bdev_retry_count: The number of attempts per I/O in the bdev layer when an I/O fails. -1 means infinite retries. (optional)
|
||||
transport_ack_timeout: Time to wait ack until packet retransmission. RDMA specific.
|
||||
Range 0-31 where 0 is driver-specific default value (optional)
|
||||
ctrlr_loss_timeout_sec: Time to wait until ctrlr is reconnected before deleting ctrlr.
|
||||
-1 means infinite reconnect retries. 0 means no reconnect retry.
|
||||
If reconnect_delay_sec is zero, ctrlr_loss_timeout_sec has to be zero.
|
||||
If reconnect_delay_sec is non-zero, ctrlr_loss_timeout_sec has to be -1 or not less than reconnect_delay_sec.
|
||||
This can be overridden by bdev_nvme_attach_controller. (optional)
|
||||
reconnect_delay_sec: Time to delay a reconnect retry.
|
||||
If ctrlr_loss_timeout_sec is zero, reconnect_delay_sec has to be zero.
|
||||
If ctrlr_loss_timeout_sec is -1, reconnect_delay_sec has to be non-zero.
|
||||
If ctrlr_loss_timeout_sec is not -1 or zero, reconnect_sec has to be non-zero and less than ctrlr_loss_timeout_sec.
|
||||
This can be overridden by bdev_nvme_attach_controller. (optional)
|
||||
fail_io_fast_timeout_sec: Time to wait until ctrlr is reconnected before failing I/O to ctrlr.
|
||||
0 means no such timeout.
|
||||
If fast_io_fail_timeout_sec is not zero, it has to be not less than reconnect_delay_sec and less than
|
||||
ctrlr_loss_timeout_sec if ctrlr_loss_timeout_sec is not -1.
|
||||
This can be overridden by bdev_nvme_attach_controller. (optional)
|
||||
|
||||
"""
|
||||
params = {}
|
||||
|
||||
@ -516,6 +533,15 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo
|
||||
if transport_ack_timeout is not None:
|
||||
params['transport_ack_timeout'] = transport_ack_timeout
|
||||
|
||||
if ctrlr_loss_timeout_sec is not None:
|
||||
params['ctrlr_loss_timeout_sec'] = ctrlr_loss_timeout_sec
|
||||
|
||||
if reconnect_delay_sec is not None:
|
||||
params['reconnect_delay_sec'] = reconnect_delay_sec
|
||||
|
||||
if fast_io_fail_timeout_sec is not None:
|
||||
params['fast_io_fail_timeout_sec'] = fast_io_fail_timeout_sec
|
||||
|
||||
return client.call('bdev_nvme_set_options', params)
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user