From b99b00e59579bb7a44c8d84f8befdfb3d2ec4dd4 Mon Sep 17 00:00:00 2001 From: "Simon A. F. Lund" Date: Sun, 25 Sep 2022 11:55:53 +0200 Subject: [PATCH] bdev_xnvme: add option to conserve cpu and set default to false To achieve the highest possible IO-rates and lowest latency, then CPU cycles must be spent processing IO. This commit introduces three different techniques dependendent on the 'io_mechanism' used. For 'libaio', xNVMe is instructued via 'opts.poll_io' to not wait for completions, thus xnvme_queue_poke() returns immmediatly, the user can then call xnvme_queue_poke() as frequently as desired. This requires xNVMe v0.5.0. Earlier versions will ignore 'opts.poll_io' for 'libaio'. For 'io_uring', xNVMe is instructed via 'opts.poll_io' to enable IORING_SETUP_IOPOLL. For 'io_uring_cmd', xNVMe is instructued via 'opts.poll_sq' to enable IORING_SETUP_SQPOLL, this sets up a kernel-side thread reaping completions from user-space on the ring. Note that 'io_uring_cmd' requires liburing >= 2.2. This commit enables the above by default. The above can be disabled by setting 'conserve_cpu' to true on the bdev-setup. Signed-off-by: Simon A. F. Lund Change-Id: Id54f1e59733ce9ae3b174ad4562904d868d4ef4f Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14678 Tested-by: SPDK CI Jenkins Reviewed-by: Jim Harris Reviewed-by: Tomasz Zawadzki --- doc/jsonrpc.md | 4 +++- module/bdev/xnvme/bdev_xnvme.c | 15 ++++++++++++--- module/bdev/xnvme/bdev_xnvme.h | 2 +- module/bdev/xnvme/bdev_xnvme_rpc.c | 4 +++- python/spdk/rpc/bdev.py | 13 +++++++++---- scripts/rpc.py | 1 + 6 files changed, 29 insertions(+), 10 deletions(-) diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index dd7c75503..ed14042eb 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -5737,6 +5737,7 @@ Name | Optional | Type | Description name | Required | string | name of xNVMe bdev to create filename | Required | string | path to device or file (ex: /dev/nvme0n1) io_mechanism | Required | string | IO mechanism to use (ex: libaio, io_uring, io_uring_cmd, etc.) +conserve_cpu | Optional | boolean | Whether or not to conserve CPU when polling (default: false) #### Result @@ -5754,7 +5755,8 @@ Example request: "params": { "name": "bdev_ng0n1", "filename": "/dev/ng0n1", - "io_mechanism": "io_uring_cmd" + "io_mechanism": "io_uring_cmd", + "conserve_cpu": false, } } ~~~ diff --git a/module/bdev/xnvme/bdev_xnvme.c b/module/bdev/xnvme/bdev_xnvme.c index e75f039a9..256082c99 100644 --- a/module/bdev/xnvme/bdev_xnvme.c +++ b/module/bdev/xnvme/bdev_xnvme.c @@ -39,6 +39,7 @@ struct bdev_xnvme { char *io_mechanism; struct xnvme_dev *dev; uint32_t nsid; + bool conserve_cpu; TAILQ_ENTRY(bdev_xnvme) link; }; @@ -68,6 +69,7 @@ bdev_xnvme_config_json(struct spdk_json_write_ctx *w) spdk_json_write_named_string(w, "name", xnvme->bdev.name); spdk_json_write_named_string(w, "filename", xnvme->filename); spdk_json_write_named_string(w, "io_mechanism", xnvme->io_mechanism); + spdk_json_write_named_bool(w, "conserve_cpu", xnvme->conserve_cpu); spdk_json_write_object_end(w); spdk_json_write_object_end(w); @@ -304,7 +306,8 @@ bdev_xnvme_queue_destroy_cb(void *io_device, void *ctx_buf) } struct spdk_bdev * -create_xnvme_bdev(const char *name, const char *filename, const char *io_mechanism) +create_xnvme_bdev(const char *name, const char *filename, const char *io_mechanism, + bool conserve_cpu) { struct bdev_xnvme *xnvme; uint32_t block_size; @@ -328,8 +331,14 @@ create_xnvme_bdev(const char *name, const char *filename, const char *io_mechani goto error_return; } - if (!strcmp(xnvme->io_mechanism, "io_uring_cmd")) { - opts.poll_sq = 1; + if (!conserve_cpu) { + if (!strcmp(xnvme->io_mechanism, "libaio")) { + opts.poll_io = 1; + } else if (!strcmp(xnvme->io_mechanism, "io_uring")) { + opts.poll_io = 1; + } else if (!strcmp(xnvme->io_mechanism, "io_uring_cmd")) { + opts.poll_sq = 1; + } } xnvme->filename = strdup(filename); diff --git a/module/bdev/xnvme/bdev_xnvme.h b/module/bdev/xnvme/bdev_xnvme.h index d8305b522..dbe2a6f00 100644 --- a/module/bdev/xnvme/bdev_xnvme.h +++ b/module/bdev/xnvme/bdev_xnvme.h @@ -16,7 +16,7 @@ typedef void (*spdk_delete_xnvme_complete)(void *cb_arg, int bdeverrno); struct spdk_bdev *create_xnvme_bdev(const char *name, const char *filename, - const char *io_mechanism); + const char *io_mechanism, bool conserve_cpu); void delete_xnvme_bdev(struct spdk_bdev *bdev, spdk_delete_xnvme_complete cb_fn, void *cb_arg); diff --git a/module/bdev/xnvme/bdev_xnvme_rpc.c b/module/bdev/xnvme/bdev_xnvme_rpc.c index d57429f50..f69867895 100644 --- a/module/bdev/xnvme/bdev_xnvme_rpc.c +++ b/module/bdev/xnvme/bdev_xnvme_rpc.c @@ -14,6 +14,7 @@ struct rpc_create_xnvme { char *name; char *filename; char *io_mechanism; + bool conserve_cpu; }; /* Free the allocated memory resource after the RPC handling. */ @@ -30,6 +31,7 @@ static const struct spdk_json_object_decoder rpc_create_xnvme_decoders[] = { {"name", offsetof(struct rpc_create_xnvme, name), spdk_json_decode_string}, {"filename", offsetof(struct rpc_create_xnvme, filename), spdk_json_decode_string}, {"io_mechanism", offsetof(struct rpc_create_xnvme, io_mechanism), spdk_json_decode_string}, + {"conserve_cpu", offsetof(struct rpc_create_xnvme, conserve_cpu), spdk_json_decode_bool, true}, }; static void @@ -57,7 +59,7 @@ rpc_bdev_xnvme_create(struct spdk_jsonrpc_request *request, goto cleanup; } - bdev = create_xnvme_bdev(req.name, req.filename, req.io_mechanism); + bdev = create_xnvme_bdev(req.name, req.filename, req.io_mechanism, req.conserve_cpu); if (!bdev) { SPDK_ERRLOG("Unable to create xNVMe bdev from file %s\n", req.filename); spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, diff --git a/python/spdk/rpc/bdev.py b/python/spdk/rpc/bdev.py index 73cb90de6..d2d58303f 100644 --- a/python/spdk/rpc/bdev.py +++ b/python/spdk/rpc/bdev.py @@ -492,20 +492,25 @@ def bdev_uring_delete(client, name): return client.call('bdev_uring_delete', params) -def bdev_xnvme_create(client, filename, name, io_mechanism): +def bdev_xnvme_create(client, filename, name, io_mechanism, conserve_cpu=None): """Create a bdev with xNVMe backend. Args: filename: path to device or file (ex: /dev/nvme0n1) name: name of xNVMe bdev to create io_mechanism: I/O mechanism to use (ex: io_uring, io_uring_cmd, etc.) + conserve_cpu: Whether or not to conserve CPU when polling (default: False) Returns: Name of created bdev. """ - params = {'name': name, - 'filename': filename, - 'io_mechanism': io_mechanism} + params = { + 'name': name, + 'filename': filename, + 'io_mechanism': io_mechanism, + } + if conserve_cpu: + params['conserve_cpu'] = conserve_cpu return client.call('bdev_xnvme_create', params) diff --git a/scripts/rpc.py b/scripts/rpc.py index b1535b717..a3f3fc962 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -525,6 +525,7 @@ if __name__ == "__main__": p.add_argument('filename', help='Path to device or file (ex: /dev/nvme0n1)') p.add_argument('name', help='name of xNVMe bdev to create') p.add_argument('io_mechanism', help='IO mechanism to use (ex: libaio, io_uring, io_uring_cmd, etc.)') + p.add_argument('conserve_cpu', action='store_true', help='Whether or not to conserve CPU when polling') p.set_defaults(func=bdev_xnvme_create) def bdev_xnvme_delete(args):