From e33ae4a6d57a782c840c6ba85efc8ed27ee8d13b Mon Sep 17 00:00:00 2001 From: Shuhei Matsumoto Date: Fri, 6 Jan 2023 08:26:33 +0900 Subject: [PATCH] bdev/nvme: Count number of NVMe errors per type or code Error counters for NVMe error was added in the generic bdev layer but we want to know more detailed information for some use cases. Add NVMe error counters per type and per code as module specific statistics. For status codes, the first idea was to have different named member for each status code value. However, it was bad and too hard to test, review, and maintain. Instead, we have just two dimensional uint32_t arrays, and increment one of these uint32_t values based on the status code type and status code. Then, when dump the JSON, we use spdk_nvme_cpl_get_status_string() and spdk_nvme_cpl_get_status_type_string(). This idea has one potential downside. This idea consumes 4 (types) * 256 (codes) * 4 (counter) = 4KB per NVMe bdev. We can make this smarter if memory allocation is a problem. Hence we add an option nvme_error_stat to enable this feature only if the user requests. Additionally, the string returned by spdk_nvme_cpl_get_status_string() or spdk_nvme_cpl_get_status_type_string() has uppercases, spaces, and hyphens. These should not be included in JSON strings. Hence, convert these via spdk_strcpy_replace(). Signed-off-by: Shuhei Matsumoto Change-Id: I07b07621e777bdf6556b95054abbbb65e5f9ea3e Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15370 Reviewed-by: Jim Harris Reviewed-by: Konrad Sztyber Reviewed-by: Aleksey Marchuk Tested-by: SPDK CI Jenkins Community-CI: Mellanox Build Bot --- CHANGELOG.md | 3 + doc/jsonrpc.md | 1 + module/bdev/nvme/bdev_nvme.c | 128 ++++++++++++++++++ module/bdev/nvme/bdev_nvme.h | 7 + module/bdev/nvme/bdev_nvme_rpc.c | 1 + python/spdk/rpc/bdev.py | 6 +- scripts/rpc.py | 4 +- .../lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c | 6 + 8 files changed, 154 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a529da6ff..bdd31913d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,9 @@ Add function pointers, `dump_device_stat_json` and `reset_device_stat` to the bd function table to display and reset I/O statistics specific for the module specific bdev context. +A new option `nvme_error_stat` was added to the `bdev_nvme_set_options` RPC to enable +collecting NVMe error counts. + ### event Added core lock file mechanism to prevent the same CPU cores from being used by multiple diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index ed14042eb..b9095836e 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -3429,6 +3429,7 @@ reconnect_delay_sec | Optional | number | Time to delay a reconnect fast_io_fail_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before failing I/O to ctrlr. 0 means no such timeout. disable_auto_failback | Optional | boolean | Disable automatic failback. The RPC bdev_nvme_set_preferred_path can be used to do manual failback. generate_uuids | Optional | boolean | Enable generation of UUIDs for NVMe bdevs that do not provide this value themselves. +nvme_error_stat | Optional | boolean | Enable collecting NVMe error counts. #### Example diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index d45532282..64d7f8ee8 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -124,6 +124,7 @@ static struct spdk_bdev_nvme_opts g_opts = { .disable_auto_failback = false, .generate_uuids = false, .transport_tos = 0, + .nvme_error_stat = false, }; #define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL @@ -1018,6 +1019,40 @@ bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch, delay_ms * 1000ULL); } +static void +bdev_nvme_update_nvme_error_stat(struct spdk_bdev_io *bdev_io, const struct spdk_nvme_cpl *cpl) +{ + struct nvme_bdev *nbdev; + uint16_t sct, sc; + + assert(spdk_nvme_cpl_is_error(cpl)); + + nbdev = bdev_io->bdev->ctxt; + + if (nbdev->err_stat == NULL) { + return; + } + + sct = cpl->status.sct; + sc = cpl->status.sc; + + pthread_mutex_lock(&nbdev->mutex); + + nbdev->err_stat->status_type[sct]++; + switch (sct) { + case SPDK_NVME_SCT_GENERIC: + case SPDK_NVME_SCT_COMMAND_SPECIFIC: + case SPDK_NVME_SCT_MEDIA_ERROR: + case SPDK_NVME_SCT_PATH: + nbdev->err_stat->status[sct][sc]++; + break; + default: + break; + } + + pthread_mutex_unlock(&nbdev->mutex); +} + static inline void bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio, const struct spdk_nvme_cpl *cpl) @@ -1034,6 +1069,11 @@ bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio, goto complete; } + /* Update error counts before deciding if retry is needed. + * Hence, error counts may be more than the number of I/O errors. + */ + bdev_nvme_update_nvme_error_stat(bdev_io, cpl); + if (cpl->status.dnr != 0 || spdk_nvme_cpl_is_aborted_by_request(cpl) || (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) { goto complete; @@ -1348,6 +1388,7 @@ _bdev_nvme_unregister_dev_cb(void *io_device) struct nvme_bdev *nvme_disk = io_device; free(nvme_disk->disk.name); + free(nvme_disk->err_stat); free(nvme_disk); } @@ -2789,6 +2830,79 @@ bdev_nvme_get_spin_time(struct spdk_io_channel *ch) return (spin_time * 1000000ULL) / spdk_get_ticks_hz(); } +static void +bdev_nvme_reset_device_stat(void *ctx) +{ + struct nvme_bdev *nbdev = ctx; + + if (nbdev->err_stat != NULL) { + memset(nbdev->err_stat, 0, sizeof(struct nvme_error_stat)); + } +} + +/* JSON string should be lowercases and underscore delimited string. */ +static void +bdev_nvme_format_nvme_status(char *dst, const char *src) +{ + char tmp[256]; + + spdk_strcpy_replace(dst, 256, src, " - ", "_"); + spdk_strcpy_replace(tmp, 256, dst, "-", "_"); + spdk_strcpy_replace(dst, 256, tmp, " ", "_"); + spdk_strlwr(dst); +} + +static void +bdev_nvme_dump_device_stat_json(void *ctx, struct spdk_json_write_ctx *w) +{ + struct nvme_bdev *nbdev = ctx; + struct spdk_nvme_status status = {}; + uint16_t sct, sc; + char status_json[256]; + const char *status_str; + + if (nbdev->err_stat == NULL) { + return; + } + + spdk_json_write_named_object_begin(w, "nvme_error"); + + spdk_json_write_named_object_begin(w, "status_type"); + for (sct = 0; sct < 8; sct++) { + if (nbdev->err_stat->status_type[sct] == 0) { + continue; + } + status.sct = sct; + + status_str = spdk_nvme_cpl_get_status_type_string(&status); + assert(status_str != NULL); + bdev_nvme_format_nvme_status(status_json, status_str); + + spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status_type[sct]); + } + spdk_json_write_object_end(w); + + spdk_json_write_named_object_begin(w, "status_code"); + for (sct = 0; sct < 4; sct++) { + status.sct = sct; + for (sc = 0; sc < 256; sc++) { + if (nbdev->err_stat->status[sct][sc] == 0) { + continue; + } + status.sc = sc; + + status_str = spdk_nvme_cpl_get_status_string(&status); + assert(status_str != NULL); + bdev_nvme_format_nvme_status(status_json, status_str); + + spdk_json_write_named_uint32(w, status_json, nbdev->err_stat->status[sct][sc]); + } + } + spdk_json_write_object_end(w); + + spdk_json_write_object_end(w); +} + static const struct spdk_bdev_fn_table nvmelib_fn_table = { .destruct = bdev_nvme_destruct, .submit_request = bdev_nvme_submit_request, @@ -2799,6 +2913,8 @@ static const struct spdk_bdev_fn_table nvmelib_fn_table = { .get_spin_time = bdev_nvme_get_spin_time, .get_module_ctx = bdev_nvme_get_module_ctx, .get_memory_domains = bdev_nvme_get_memory_domains, + .reset_device_stat = bdev_nvme_reset_device_stat, + .dump_device_stat_json = bdev_nvme_dump_device_stat_json, }; typedef int (*bdev_nvme_parse_ana_log_page_cb)( @@ -3150,8 +3266,18 @@ nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns) return -ENOMEM; } + if (g_opts.nvme_error_stat) { + bdev->err_stat = calloc(1, sizeof(struct nvme_error_stat)); + if (!bdev->err_stat) { + SPDK_ERRLOG("err_stat calloc() failed\n"); + free(bdev); + return -ENOMEM; + } + } + rc = pthread_mutex_init(&bdev->mutex, NULL); if (rc != 0) { + free(bdev->err_stat); free(bdev); return rc; } @@ -3167,6 +3293,7 @@ nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns) if (rc != 0) { SPDK_ERRLOG("Failed to create NVMe disk\n"); pthread_mutex_destroy(&bdev->mutex); + free(bdev->err_stat); free(bdev); return rc; } @@ -3183,6 +3310,7 @@ nvme_bdev_create(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns) spdk_io_device_unregister(bdev, NULL); pthread_mutex_destroy(&bdev->mutex); free(bdev->disk.name); + free(bdev->err_stat); free(bdev); return rc; } diff --git a/module/bdev/nvme/bdev_nvme.h b/module/bdev/nvme/bdev_nvme.h index be6afa238..568d60995 100644 --- a/module/bdev/nvme/bdev_nvme.h +++ b/module/bdev/nvme/bdev_nvme.h @@ -143,6 +143,11 @@ struct nvme_bdev_ctrlr { TAILQ_ENTRY(nvme_bdev_ctrlr) tailq; }; +struct nvme_error_stat { + uint32_t status_type[8]; + uint32_t status[4][256]; +}; + struct nvme_bdev { struct spdk_bdev disk; uint32_t nsid; @@ -153,6 +158,7 @@ struct nvme_bdev { TAILQ_HEAD(, nvme_ns) nvme_ns_list; bool opal; TAILQ_ENTRY(nvme_bdev) tailq; + struct nvme_error_stat *err_stat; }; struct nvme_qpair { @@ -253,6 +259,7 @@ struct spdk_bdev_nvme_opts { bool generate_uuids; /* Type of Service - RDMA only */ uint8_t transport_tos; + bool nvme_error_stat; }; struct spdk_nvme_qpair *bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch); diff --git a/module/bdev/nvme/bdev_nvme_rpc.c b/module/bdev/nvme/bdev_nvme_rpc.c index 6ecda6723..e36702956 100644 --- a/module/bdev/nvme/bdev_nvme_rpc.c +++ b/module/bdev/nvme/bdev_nvme_rpc.c @@ -70,6 +70,7 @@ static const struct spdk_json_object_decoder rpc_bdev_nvme_options_decoders[] = {"disable_auto_failback", offsetof(struct spdk_bdev_nvme_opts, disable_auto_failback), spdk_json_decode_bool, true}, {"generate_uuids", offsetof(struct spdk_bdev_nvme_opts, generate_uuids), spdk_json_decode_bool, true}, {"transport_tos", offsetof(struct spdk_bdev_nvme_opts, transport_tos), spdk_json_decode_uint8, true}, + {"nvme_error_stat", offsetof(struct spdk_bdev_nvme_opts, nvme_error_stat), spdk_json_decode_bool, true}, }; static void diff --git a/python/spdk/rpc/bdev.py b/python/spdk/rpc/bdev.py index 2ddad0ded..8a357f870 100644 --- a/python/spdk/rpc/bdev.py +++ b/python/spdk/rpc/bdev.py @@ -532,7 +532,7 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo delay_cmd_submit=None, transport_retry_count=None, bdev_retry_count=None, transport_ack_timeout=None, ctrlr_loss_timeout_sec=None, reconnect_delay_sec=None, fast_io_fail_timeout_sec=None, disable_auto_failback=None, generate_uuids=None, - transport_tos=None): + transport_tos=None, nvme_error_stat=None): """Set options for the bdev nvme. This is startup command. Args: @@ -574,6 +574,7 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo These strings are based on device serial number and namespace ID and will always be the same for that device. transport_tos: IPv4 Type of Service value. Only applicable for RDMA transports. The default is 0 which means no TOS is applied. (optional) + nvme_error_stat: Enable collecting NVMe error counts. (optional) """ params = {} @@ -645,6 +646,9 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo if transport_tos is not None: params['transport_tos'] = transport_tos + if nvme_error_stat is not None: + params['nvme_error_stat'] = nvme_error_stat + return client.call('bdev_nvme_set_options', params) diff --git a/scripts/rpc.py b/scripts/rpc.py index 65c849a84..16e92049c 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -559,7 +559,8 @@ if __name__ == "__main__": fast_io_fail_timeout_sec=args.fast_io_fail_timeout_sec, disable_auto_failback=args.disable_auto_failback, generate_uuids=args.generate_uuids, - transport_tos=args.transport_tos) + transport_tos=args.transport_tos, + nvme_error_stat=args.nvme_error_stat) p = subparsers.add_parser('bdev_nvme_set_options', help='Set options for the bdev nvme type. This is startup command.') @@ -631,6 +632,7 @@ if __name__ == "__main__": p.add_argument('--transport-tos', help="""IPv4 Type of Service value. Only applicable for RDMA transports. The default is 0 which means no TOS is applied.""", type=int) + p.add_argument('-m', '--nvme-error-stat', help="Enable collecting NVMe error counts.", action='store_true') p.set_defaults(func=bdev_nvme_set_options) diff --git a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c index 26ef19fda..45c6353f1 100644 --- a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c +++ b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c @@ -184,6 +184,12 @@ DEFINE_STUB(spdk_nvme_zns_offline_zone, int, (struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, uint64_t slba, bool select_all, spdk_nvme_cmd_cb cb_fn, void *cb_arg), 0); +DEFINE_STUB(spdk_nvme_cpl_get_status_type_string, const char *, + (const struct spdk_nvme_status *status), NULL); + +DEFINE_STUB(spdk_nvme_cpl_get_status_string, const char *, + (const struct spdk_nvme_status *status), NULL); + DEFINE_STUB_V(spdk_bdev_module_fini_done, (void)); DEFINE_STUB_V(spdk_bdev_module_list_add, (struct spdk_bdev_module *bdev_module));