diff --git a/CHANGELOG.md b/CHANGELOG.md index a529da6ff..bdd31913d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,9 @@ Add function pointers, `dump_device_stat_json` and `reset_device_stat` to the bd function table to display and reset I/O statistics specific for the module specific bdev context. +A new option `nvme_error_stat` was added to the `bdev_nvme_set_options` RPC to enable +collecting NVMe error counts. + ### event Added core lock file mechanism to prevent the same CPU cores from being used by multiple diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index ed14042eb..b9095836e 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -3429,6 +3429,7 @@ reconnect_delay_sec | Optional | number | Time to delay a reconnect fast_io_fail_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before failing I/O to ctrlr. 0 means no such timeout. disable_auto_failback | Optional | boolean | Disable automatic failback. The RPC bdev_nvme_set_preferred_path can be used to do manual failback. generate_uuids | Optional | boolean | Enable generation of UUIDs for NVMe bdevs that do not provide this value themselves. +nvme_error_stat | Optional | boolean | Enable collecting NVMe error counts. #### Example diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index d45532282..64d7f8ee8 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -124,6 +124,7 @@ static struct spdk_bdev_nvme_opts g_opts = { .disable_auto_failback = false, .generate_uuids = false, .transport_tos = 0, + .nvme_error_stat = false, }; #define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL @@ -1018,6 +1019,40 @@ bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch, delay_ms * 1000ULL); } +static void +bdev_nvme_update_nvme_error_stat(struct spdk_bdev_io *bdev_io, const struct spdk_nvme_cpl *cpl) +{ + struct nvme_bdev *nbdev; + uint16_t sct, sc; + + assert(spdk_nvme_cpl_is_error(cpl)); + + nbdev = bdev_io->bdev->ctxt; + + if (nbdev->err_stat == NULL) { + return; + } + + sct = cpl->status.sct; + sc = cpl->status.sc; + + pthread_mutex_lock(&nbdev->mutex); + + nbdev->err_stat->status_type[sct]++; + switch (sct) { + case SPDK_NVME_SCT_GENERIC: + case SPDK_NVME_SCT_COMMAND_SPECIFIC: + case SPDK_NVME_SCT_MEDIA_ERROR: + case SPDK_NVME_SCT_PATH: + nbdev->err_stat->status[sct][sc]++; + break; + default: + break; + } + + pthread_mutex_unlock(&nbdev->mutex); +} + static inline void bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio, const struct spdk_nvme_cpl *cpl) @@ -1034,6 +1069,11 @@ bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio, goto complete; } + /* Update error counts before deciding if retry is needed. + * Hence, error counts may be more than the number of I/O errors. + */ + bdev_nvme_update_nvme_error_stat(bdev_io, cpl); + if (cpl->status.dnr != 0 || spdk_nvme_cpl_is_aborted_by_request(cpl) || (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) { goto complete; @@ -1348,6 +1388,7 @@ _bdev_nvme_unregister_dev_cb(void *io_device) struct nvme_bdev *nvme_disk = io_device; free(nvme_disk->disk.name); + free(nvme_disk->err_stat); free(nvme_disk); } @@ -2789,6 +2830,79 @@ bdev_nvme_get_spin_time(struct spdk_io_channel *ch) return (spin_time * 1000000ULL) / spdk_get_ticks_hz(); } +static void +bdev_nvme_reset_device_stat(void *ctx) +{ + struct nvme_bdev *nbdev = ctx; + + if (nbdev->err_stat != NULL) { + memset(nbdev->err_stat, 0, sizeof(struct nvme_error_stat)); + } +} + +/* JSON string should be lowercases and underscore delimited string. */ +static void +bdev_nvme_format_nvme_status(char *dst, const char *src) +{ + char tmp[256]; + + spdk_strcpy_replace(dst, 256, src, " - ", "_"); + spdk_strcpy_replace(tmp, 256, dst, "-", "_"); + spdk_strcpy_replace(dst, 256, tmp, " ", "_"); + spdk_strlwr(dst); +} + +static void +bdev_nvme_dump_device_stat_json(void *ctx, struct spdk_json_write_ctx *w) +{ + struct nvme_bdev *nbdev = ctx; + struct spdk_nvme_status status = {}; + uint16_t sct, sc; + char status_json[256]; + const char *status_str; + + if (nbdev->err_stat == NULL) { + return; + } + + spdk_json_write_named_object_begin(w, "nvme_error"); + + spdk_json_write_named_object_begin(w, "status_type"); + for (sct = 0; sct < 8; sct++) { + if (nbdev->err_stat->status_type[sct] == 0) { + continue; + } + status.sct = sct; + + status_str = spdk_nvme_cpl_get_status_type_string(&status); + assert(status_str != NULL); + bdev_nvme_format_nvme_status(status_json, status_str); + + spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status_type[sct]); + } + spdk_json_write_object_end(w); + + spdk_json_write_named_object_begin(w, "status_code"); + for (sct = 0; sct < 4; sct++) { + status.sct = sct; + for (sc = 0; sc < 256; sc++) { + if (nbdev->err_stat->status[sct][sc] == 0) { + continue; + } + status.sc = sc; + + status_str = spdk_nvme_cpl_get_status_string(&status); + assert(status_str != NULL); + bdev_nvme_format_nvme_status(status_json, status_str); + + spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status[sct][sc]); + } + } + spdk_json_write_object_end(w); + + spdk_json_write_object_end(w); +} + static const struct spdk_bdev_fn_table nvmelib_fn_table = { .destruct = bdev_nvme_destruct, .submit_request = bdev_nvme_submit_request, @@ -2799,6 +2913,8 @@ static const struct spdk_bdev_fn_table nvmelib_fn_table = { .get_spin_time = bdev_nvme_get_spin_time, .get_module_ctx = bdev_nvme_get_module_ctx, .get_memory_domains = bdev_nvme_get_memory_domains, + .reset_device_stat = bdev_nvme_reset_device_stat, + .dump_device_stat_json = bdev_nvme_dump_device_stat_json, }; typedef int (*bdev_nvme_parse_ana_log_page_cb)( @@ -3150,8 +3266,18 @@ nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns) return -ENOMEM; } + if (g_opts.nvme_error_stat) { + bdev->err_stat = calloc(1, sizeof(struct nvme_error_stat)); + if (!bdev->err_stat) { + SPDK_ERRLOG("err_stat calloc() failed\n"); + free(bdev); + return -ENOMEM; + } + } + rc = pthread_mutex_init(&bdev->mutex, NULL); if (rc != 0) { + free(bdev->err_stat); free(bdev); return rc; } @@ -3167,6 +3293,7 @@ nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns) if (rc != 0) { SPDK_ERRLOG("Failed to create NVMe disk\n"); pthread_mutex_destroy(&bdev->mutex); + free(bdev->err_stat); free(bdev); return rc; } @@ -3183,6 +3310,7 @@ nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns) spdk_io_device_unregister(bdev, NULL); pthread_mutex_destroy(&bdev->mutex); free(bdev->disk.name); + free(bdev->err_stat); free(bdev); return rc; } diff --git a/module/bdev/nvme/bdev_nvme.h b/module/bdev/nvme/bdev_nvme.h index be6afa238..568d60995 100644 --- a/module/bdev/nvme/bdev_nvme.h +++ b/module/bdev/nvme/bdev_nvme.h @@ -143,6 +143,11 @@ struct nvme_bdev_ctrlr { TAILQ_ENTRY(nvme_bdev_ctrlr) tailq; }; +struct nvme_error_stat { + uint32_t status_type[8]; + uint32_t status[4][256]; +}; + struct nvme_bdev { struct spdk_bdev disk; uint32_t nsid; @@ -153,6 +158,7 @@ struct nvme_bdev { TAILQ_HEAD(, nvme_ns) nvme_ns_list; bool opal; TAILQ_ENTRY(nvme_bdev) tailq; + struct nvme_error_stat *err_stat; }; struct nvme_qpair { @@ -253,6 +259,7 @@ struct spdk_bdev_nvme_opts { bool generate_uuids; /* Type of Service - RDMA only */ uint8_t transport_tos; + bool nvme_error_stat; }; struct spdk_nvme_qpair *bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch); diff --git a/module/bdev/nvme/bdev_nvme_rpc.c b/module/bdev/nvme/bdev_nvme_rpc.c index 6ecda6723..e36702956 100644 --- a/module/bdev/nvme/bdev_nvme_rpc.c +++ b/module/bdev/nvme/bdev_nvme_rpc.c @@ -70,6 +70,7 @@ static const struct spdk_json_object_decoder rpc_bdev_nvme_options_decoders[] = {"disable_auto_failback", offsetof(struct spdk_bdev_nvme_opts, disable_auto_failback), spdk_json_decode_bool, true}, {"generate_uuids", offsetof(struct spdk_bdev_nvme_opts, generate_uuids), spdk_json_decode_bool, true}, {"transport_tos", offsetof(struct spdk_bdev_nvme_opts, transport_tos), spdk_json_decode_uint8, true}, + {"nvme_error_stat", offsetof(struct spdk_bdev_nvme_opts, nvme_error_stat), spdk_json_decode_bool, true}, }; static void diff --git a/python/spdk/rpc/bdev.py b/python/spdk/rpc/bdev.py index 2ddad0ded..8a357f870 100644 --- a/python/spdk/rpc/bdev.py +++ b/python/spdk/rpc/bdev.py @@ -532,7 +532,7 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo delay_cmd_submit=None, transport_retry_count=None, bdev_retry_count=None, transport_ack_timeout=None, ctrlr_loss_timeout_sec=None, reconnect_delay_sec=None, fast_io_fail_timeout_sec=None, disable_auto_failback=None, generate_uuids=None, - transport_tos=None): + transport_tos=None, nvme_error_stat=None): """Set options for the bdev nvme. This is startup command. Args: @@ -574,6 +574,7 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo These strings are based on device serial number and namespace ID and will always be the same for that device. transport_tos: IPv4 Type of Service value. Only applicable for RDMA transports. The default is 0 which means no TOS is applied. (optional) + nvme_error_stat: Enable collecting NVMe error counts. (optional) """ params = {} @@ -645,6 +646,9 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo if transport_tos is not None: params['transport_tos'] = transport_tos + if nvme_error_stat is not None: + params['nvme_error_stat'] = nvme_error_stat + return client.call('bdev_nvme_set_options', params) diff --git a/scripts/rpc.py b/scripts/rpc.py index 65c849a84..16e92049c 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -559,7 +559,8 @@ if __name__ == "__main__": fast_io_fail_timeout_sec=args.fast_io_fail_timeout_sec, disable_auto_failback=args.disable_auto_failback, generate_uuids=args.generate_uuids, - transport_tos=args.transport_tos) + transport_tos=args.transport_tos, + nvme_error_stat=args.nvme_error_stat) p = subparsers.add_parser('bdev_nvme_set_options', help='Set options for the bdev nvme type. This is startup command.') @@ -631,6 +632,7 @@ if __name__ == "__main__": p.add_argument('--transport-tos', help="""IPv4 Type of Service value. Only applicable for RDMA transports. The default is 0 which means no TOS is applied.""", type=int) + p.add_argument('-m', '--nvme-error-stat', help="Enable collecting NVMe error counts.", action='store_true') p.set_defaults(func=bdev_nvme_set_options) diff --git a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c index 26ef19fda..45c6353f1 100644 --- a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c +++ b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c @@ -184,6 +184,12 @@ DEFINE_STUB(spdk_nvme_zns_offline_zone, int, (struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, uint64_t slba, bool select_all, spdk_nvme_cmd_cb cb_fn, void *cb_arg), 0); +DEFINE_STUB(spdk_nvme_cpl_get_status_type_string, const char *, + (const struct spdk_nvme_status *status), NULL); + +DEFINE_STUB(spdk_nvme_cpl_get_status_string, const char *, + (const struct spdk_nvme_status *status), NULL); + DEFINE_STUB_V(spdk_bdev_module_fini_done, (void)); DEFINE_STUB_V(spdk_bdev_module_list_add, (struct spdk_bdev_module *bdev_module));