bdev_xnvme: add option to conserve cpu and set default to false

To achieve the highest possible IO-rates and lowest latency, then CPU
cycles must be spent processing IO. This commit introduces three
different techniques dependendent on the 'io_mechanism' used.

For 'libaio', xNVMe is instructued via 'opts.poll_io' to not wait for
completions, thus xnvme_queue_poke() returns immmediatly, the user can
then call xnvme_queue_poke() as frequently as desired. This requires
xNVMe v0.5.0. Earlier versions will ignore 'opts.poll_io' for 'libaio'.

For 'io_uring', xNVMe is instructed via 'opts.poll_io' to enable
IORING_SETUP_IOPOLL.

For 'io_uring_cmd', xNVMe is instructued via 'opts.poll_sq' to enable
IORING_SETUP_SQPOLL, this sets up a kernel-side thread reaping
completions from user-space on the ring. Note that 'io_uring_cmd'
requires liburing >= 2.2.

This commit enables the above by default. The above can be disabled by
setting 'conserve_cpu' to true on the bdev-setup.

Signed-off-by: Simon A. F. Lund <simon.lund@samsung.com>
Change-Id: Id54f1e59733ce9ae3b174ad4562904d868d4ef4f
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14678
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Tomasz Zawadzki <tomasz.zawadzki@intel.com>
This commit is contained in:
Simon A. F. Lund 2022-09-25 11:55:53 +02:00 committed by Tomasz Zawadzki
parent 877573897a
commit b99b00e595
6 changed files with 29 additions and 10 deletions

View File

@ -5737,6 +5737,7 @@ Name | Optional | Type | Description
name | Required | string | name of xNVMe bdev to create name | Required | string | name of xNVMe bdev to create
filename | Required | string | path to device or file (ex: /dev/nvme0n1) filename | Required | string | path to device or file (ex: /dev/nvme0n1)
io_mechanism | Required | string | IO mechanism to use (ex: libaio, io_uring, io_uring_cmd, etc.) io_mechanism | Required | string | IO mechanism to use (ex: libaio, io_uring, io_uring_cmd, etc.)
conserve_cpu | Optional | boolean | Whether or not to conserve CPU when polling (default: false)
#### Result #### Result
@ -5754,7 +5755,8 @@ Example request:
"params": { "params": {
"name": "bdev_ng0n1", "name": "bdev_ng0n1",
"filename": "/dev/ng0n1", "filename": "/dev/ng0n1",
"io_mechanism": "io_uring_cmd" "io_mechanism": "io_uring_cmd",
"conserve_cpu": false,
} }
} }
~~~ ~~~

View File

@ -39,6 +39,7 @@ struct bdev_xnvme {
char *io_mechanism; char *io_mechanism;
struct xnvme_dev *dev; struct xnvme_dev *dev;
uint32_t nsid; uint32_t nsid;
bool conserve_cpu;
TAILQ_ENTRY(bdev_xnvme) link; TAILQ_ENTRY(bdev_xnvme) link;
}; };
@ -68,6 +69,7 @@ bdev_xnvme_config_json(struct spdk_json_write_ctx *w)
spdk_json_write_named_string(w, "name", xnvme->bdev.name); spdk_json_write_named_string(w, "name", xnvme->bdev.name);
spdk_json_write_named_string(w, "filename", xnvme->filename); spdk_json_write_named_string(w, "filename", xnvme->filename);
spdk_json_write_named_string(w, "io_mechanism", xnvme->io_mechanism); spdk_json_write_named_string(w, "io_mechanism", xnvme->io_mechanism);
spdk_json_write_named_bool(w, "conserve_cpu", xnvme->conserve_cpu);
spdk_json_write_object_end(w); spdk_json_write_object_end(w);
spdk_json_write_object_end(w); spdk_json_write_object_end(w);
@ -304,7 +306,8 @@ bdev_xnvme_queue_destroy_cb(void *io_device, void *ctx_buf)
} }
struct spdk_bdev * struct spdk_bdev *
create_xnvme_bdev(const char *name, const char *filename, const char *io_mechanism) create_xnvme_bdev(const char *name, const char *filename, const char *io_mechanism,
bool conserve_cpu)
{ {
struct bdev_xnvme *xnvme; struct bdev_xnvme *xnvme;
uint32_t block_size; uint32_t block_size;
@ -328,8 +331,14 @@ create_xnvme_bdev(const char *name, const char *filename, const char *io_mechani
goto error_return; goto error_return;
} }
if (!strcmp(xnvme->io_mechanism, "io_uring_cmd")) { if (!conserve_cpu) {
opts.poll_sq = 1; if (!strcmp(xnvme->io_mechanism, "libaio")) {
opts.poll_io = 1;
} else if (!strcmp(xnvme->io_mechanism, "io_uring")) {
opts.poll_io = 1;
} else if (!strcmp(xnvme->io_mechanism, "io_uring_cmd")) {
opts.poll_sq = 1;
}
} }
xnvme->filename = strdup(filename); xnvme->filename = strdup(filename);

View File

@ -16,7 +16,7 @@
typedef void (*spdk_delete_xnvme_complete)(void *cb_arg, int bdeverrno); typedef void (*spdk_delete_xnvme_complete)(void *cb_arg, int bdeverrno);
struct spdk_bdev *create_xnvme_bdev(const char *name, const char *filename, struct spdk_bdev *create_xnvme_bdev(const char *name, const char *filename,
const char *io_mechanism); const char *io_mechanism, bool conserve_cpu);
void delete_xnvme_bdev(struct spdk_bdev *bdev, spdk_delete_xnvme_complete cb_fn, void *cb_arg); void delete_xnvme_bdev(struct spdk_bdev *bdev, spdk_delete_xnvme_complete cb_fn, void *cb_arg);

View File

@ -14,6 +14,7 @@ struct rpc_create_xnvme {
char *name; char *name;
char *filename; char *filename;
char *io_mechanism; char *io_mechanism;
bool conserve_cpu;
}; };
/* Free the allocated memory resource after the RPC handling. */ /* Free the allocated memory resource after the RPC handling. */
@ -30,6 +31,7 @@ static const struct spdk_json_object_decoder rpc_create_xnvme_decoders[] = {
{"name", offsetof(struct rpc_create_xnvme, name), spdk_json_decode_string}, {"name", offsetof(struct rpc_create_xnvme, name), spdk_json_decode_string},
{"filename", offsetof(struct rpc_create_xnvme, filename), spdk_json_decode_string}, {"filename", offsetof(struct rpc_create_xnvme, filename), spdk_json_decode_string},
{"io_mechanism", offsetof(struct rpc_create_xnvme, io_mechanism), spdk_json_decode_string}, {"io_mechanism", offsetof(struct rpc_create_xnvme, io_mechanism), spdk_json_decode_string},
{"conserve_cpu", offsetof(struct rpc_create_xnvme, conserve_cpu), spdk_json_decode_bool, true},
}; };
static void static void
@ -57,7 +59,7 @@ rpc_bdev_xnvme_create(struct spdk_jsonrpc_request *request,
goto cleanup; goto cleanup;
} }
bdev = create_xnvme_bdev(req.name, req.filename, req.io_mechanism); bdev = create_xnvme_bdev(req.name, req.filename, req.io_mechanism, req.conserve_cpu);
if (!bdev) { if (!bdev) {
SPDK_ERRLOG("Unable to create xNVMe bdev from file %s\n", req.filename); SPDK_ERRLOG("Unable to create xNVMe bdev from file %s\n", req.filename);
spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,

View File

@ -492,20 +492,25 @@ def bdev_uring_delete(client, name):
return client.call('bdev_uring_delete', params) return client.call('bdev_uring_delete', params)
def bdev_xnvme_create(client, filename, name, io_mechanism): def bdev_xnvme_create(client, filename, name, io_mechanism, conserve_cpu=None):
"""Create a bdev with xNVMe backend. """Create a bdev with xNVMe backend.
Args: Args:
filename: path to device or file (ex: /dev/nvme0n1) filename: path to device or file (ex: /dev/nvme0n1)
name: name of xNVMe bdev to create name: name of xNVMe bdev to create
io_mechanism: I/O mechanism to use (ex: io_uring, io_uring_cmd, etc.) io_mechanism: I/O mechanism to use (ex: io_uring, io_uring_cmd, etc.)
conserve_cpu: Whether or not to conserve CPU when polling (default: False)
Returns: Returns:
Name of created bdev. Name of created bdev.
""" """
params = {'name': name, params = {
'filename': filename, 'name': name,
'io_mechanism': io_mechanism} 'filename': filename,
'io_mechanism': io_mechanism,
}
if conserve_cpu:
params['conserve_cpu'] = conserve_cpu
return client.call('bdev_xnvme_create', params) return client.call('bdev_xnvme_create', params)

View File

@ -525,6 +525,7 @@ if __name__ == "__main__":
p.add_argument('filename', help='Path to device or file (ex: /dev/nvme0n1)') p.add_argument('filename', help='Path to device or file (ex: /dev/nvme0n1)')
p.add_argument('name', help='name of xNVMe bdev to create') p.add_argument('name', help='name of xNVMe bdev to create')
p.add_argument('io_mechanism', help='IO mechanism to use (ex: libaio, io_uring, io_uring_cmd, etc.)') p.add_argument('io_mechanism', help='IO mechanism to use (ex: libaio, io_uring, io_uring_cmd, etc.)')
p.add_argument('conserve_cpu', action='store_true', help='Whether or not to conserve CPU when polling')
p.set_defaults(func=bdev_xnvme_create) p.set_defaults(func=bdev_xnvme_create)
def bdev_xnvme_delete(args): def bdev_xnvme_delete(args):