bdev/malloc: Support both of interleaved and separated metadata
The malloc bdev module supports both of interleaved and separated metadata in this patch. Different from the NULL bdev module, opts->block_size is a data block size and a block size is caculated internally as a sum of opts->block_size and opts->md_size if opts->md_interleave is true, or opts->block_size otherwise. This will be more intuitive. Additionally, opts->md_size accepts only either of 0, 8, 16, 32, 64, or 128. Protection information (T10 DIF/DIX) will be supported in the following patches. Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Change-Id: Icd9e92c8ea94e30139e416f8c533ab4cf473d2a8 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14984 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com> Community-CI: Mellanox Build Bot
This commit is contained in:
parent
e6b2b9075a
commit
aef00d4420
@ -2,6 +2,10 @@
|
||||
|
||||
## v23.01: (Upcoming Release)
|
||||
|
||||
### bdev
|
||||
|
||||
Both of interleaved and separated metadata are now supported by the malloc bdev module.
|
||||
|
||||
### scheduler
|
||||
|
||||
Changing scheduler from dynamic back to static is no longer possible,
|
||||
|
@ -2790,10 +2790,12 @@ Construct @ref bdev_config_malloc
|
||||
Name | Optional | Type | Description
|
||||
----------------------- | -------- | ----------- | -----------
|
||||
name | Optional | string | Bdev name to use
|
||||
block_size | Required | number | Block size in bytes -must be multiple of 512
|
||||
block_size | Required | number | Data block size in bytes -must be multiple of 512
|
||||
num_blocks | Required | number | Number of blocks
|
||||
uuid | Optional | string | UUID of new bdev
|
||||
optimal_io_boundary | Optional | number | Split on optimal IO boundary, in number of blocks, default 0
|
||||
md_size | Optional | number | Metadata size for this bdev (0, 8, 16, 32, 64, or 128). Default is 0.
|
||||
md_interleave | Optional | boolean | Metadata location, interleaved if true, and separated if false. Default is false.
|
||||
|
||||
#### Result
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
struct malloc_disk {
|
||||
struct spdk_bdev disk;
|
||||
void *malloc_buf;
|
||||
void *malloc_md_buf;
|
||||
TAILQ_ENTRY(malloc_disk) link;
|
||||
};
|
||||
|
||||
@ -90,6 +91,7 @@ malloc_disk_free(struct malloc_disk *malloc_disk)
|
||||
|
||||
free(malloc_disk->disk.name);
|
||||
spdk_free(malloc_disk->malloc_buf);
|
||||
spdk_free(malloc_disk->malloc_md_buf);
|
||||
free(malloc_disk);
|
||||
}
|
||||
|
||||
@ -122,10 +124,12 @@ bdev_malloc_check_iov_len(struct iovec *iovs, int iovcnt, size_t nbytes)
|
||||
static void
|
||||
bdev_malloc_readv(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
|
||||
struct malloc_task *task,
|
||||
struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
|
||||
struct iovec *iov, int iovcnt, size_t len, uint64_t offset,
|
||||
void *md_buf, size_t md_len, uint64_t md_offset)
|
||||
{
|
||||
int64_t res = 0;
|
||||
void *src = mdisk->malloc_buf + offset;
|
||||
void *src;
|
||||
void *md_src;
|
||||
int i;
|
||||
|
||||
if (bdev_malloc_check_iov_len(iov, iovcnt, len)) {
|
||||
@ -134,11 +138,13 @@ bdev_malloc_readv(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
|
||||
return;
|
||||
}
|
||||
|
||||
task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
|
||||
task->num_outstanding = 0;
|
||||
|
||||
SPDK_DEBUGLOG(bdev_malloc, "read %zu bytes from offset %#" PRIx64 ", iovcnt=%d\n",
|
||||
len, offset, iovcnt);
|
||||
|
||||
task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
|
||||
task->num_outstanding = 0;
|
||||
src = mdisk->malloc_buf + offset;
|
||||
|
||||
for (i = 0; i < iovcnt; i++) {
|
||||
task->num_outstanding++;
|
||||
@ -153,15 +159,34 @@ bdev_malloc_readv(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
|
||||
src += iov[i].iov_len;
|
||||
len -= iov[i].iov_len;
|
||||
}
|
||||
|
||||
if (md_buf == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
SPDK_DEBUGLOG(bdev_malloc, "read metadata %zu bytes from offset%#" PRIx64 "\n",
|
||||
md_len, md_offset);
|
||||
|
||||
md_src = mdisk->malloc_md_buf + md_offset;
|
||||
|
||||
task->num_outstanding++;
|
||||
res = spdk_accel_submit_copy(ch, md_buf, md_src, md_len, 0, malloc_done, task);
|
||||
|
||||
if (res != 0) {
|
||||
malloc_done(task, res);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bdev_malloc_writev(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
|
||||
struct malloc_task *task,
|
||||
struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
|
||||
struct iovec *iov, int iovcnt, size_t len, uint64_t offset,
|
||||
void *md_buf, size_t md_len, uint64_t md_offset)
|
||||
{
|
||||
|
||||
int64_t res = 0;
|
||||
void *dst = mdisk->malloc_buf + offset;
|
||||
void *dst;
|
||||
void *md_dst;
|
||||
int i;
|
||||
|
||||
if (bdev_malloc_check_iov_len(iov, iovcnt, len)) {
|
||||
@ -173,6 +198,8 @@ bdev_malloc_writev(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
|
||||
SPDK_DEBUGLOG(bdev_malloc, "wrote %zu bytes to offset %#" PRIx64 ", iovcnt=%d\n",
|
||||
len, offset, iovcnt);
|
||||
|
||||
dst = mdisk->malloc_buf + offset;
|
||||
|
||||
task->status = SPDK_BDEV_IO_STATUS_SUCCESS;
|
||||
task->num_outstanding = 0;
|
||||
|
||||
@ -188,6 +215,22 @@ bdev_malloc_writev(struct malloc_disk *mdisk, struct spdk_io_channel *ch,
|
||||
|
||||
dst += iov[i].iov_len;
|
||||
}
|
||||
|
||||
if (md_buf == NULL) {
|
||||
return;
|
||||
}
|
||||
SPDK_DEBUGLOG(bdev_malloc, "wrote metadata %zu bytes to offset %#" PRIx64 "\n",
|
||||
md_len, md_offset);
|
||||
|
||||
md_dst = mdisk->malloc_md_buf + md_offset;
|
||||
|
||||
task->num_outstanding++;
|
||||
res = spdk_accel_submit_copy(ch, md_dst, md_buf, md_len, 0, malloc_done, task);
|
||||
|
||||
if (res != 0) {
|
||||
malloc_done(task, res);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
@ -208,6 +251,7 @@ static int
|
||||
_bdev_malloc_submit_request(struct malloc_channel *mch, struct spdk_bdev_io *bdev_io)
|
||||
{
|
||||
uint32_t block_size = bdev_io->bdev->blocklen;
|
||||
uint32_t md_size = bdev_io->bdev->md_len;
|
||||
|
||||
switch (bdev_io->type) {
|
||||
case SPDK_BDEV_IO_TYPE_READ:
|
||||
@ -228,7 +272,10 @@ _bdev_malloc_submit_request(struct malloc_channel *mch, struct spdk_bdev_io *bde
|
||||
bdev_io->u.bdev.iovs,
|
||||
bdev_io->u.bdev.iovcnt,
|
||||
bdev_io->u.bdev.num_blocks * block_size,
|
||||
bdev_io->u.bdev.offset_blocks * block_size);
|
||||
bdev_io->u.bdev.offset_blocks * block_size,
|
||||
bdev_io->u.bdev.md_buf,
|
||||
bdev_io->u.bdev.num_blocks * md_size,
|
||||
bdev_io->u.bdev.offset_blocks * md_size);
|
||||
return 0;
|
||||
|
||||
case SPDK_BDEV_IO_TYPE_WRITE:
|
||||
@ -238,7 +285,10 @@ _bdev_malloc_submit_request(struct malloc_channel *mch, struct spdk_bdev_io *bde
|
||||
bdev_io->u.bdev.iovs,
|
||||
bdev_io->u.bdev.iovcnt,
|
||||
bdev_io->u.bdev.num_blocks * block_size,
|
||||
bdev_io->u.bdev.offset_blocks * block_size);
|
||||
bdev_io->u.bdev.offset_blocks * block_size,
|
||||
bdev_io->u.bdev.md_buf,
|
||||
bdev_io->u.bdev.num_blocks * md_size,
|
||||
bdev_io->u.bdev.offset_blocks * md_size);
|
||||
return 0;
|
||||
|
||||
case SPDK_BDEV_IO_TYPE_RESET:
|
||||
@ -359,7 +409,8 @@ static const struct spdk_bdev_fn_table malloc_fn_table = {
|
||||
int
|
||||
create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts)
|
||||
{
|
||||
struct malloc_disk *mdisk;
|
||||
struct malloc_disk *mdisk;
|
||||
uint32_t block_size;
|
||||
int rc;
|
||||
|
||||
assert(opts != NULL);
|
||||
@ -370,10 +421,29 @@ create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts)
|
||||
}
|
||||
|
||||
if (opts->block_size % 512) {
|
||||
SPDK_ERRLOG("block size must be 512 bytes aligned\n");
|
||||
SPDK_ERRLOG("Data block size must be 512 bytes aligned\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
switch (opts->md_size) {
|
||||
case 0:
|
||||
case 8:
|
||||
case 16:
|
||||
case 32:
|
||||
case 64:
|
||||
case 128:
|
||||
break;
|
||||
default:
|
||||
SPDK_ERRLOG("metadata size %u is not supported\n", opts->md_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (opts->md_interleave) {
|
||||
block_size = opts->block_size + opts->md_size;
|
||||
} else {
|
||||
block_size = opts->block_size;
|
||||
}
|
||||
|
||||
mdisk = calloc(1, sizeof(*mdisk));
|
||||
if (!mdisk) {
|
||||
SPDK_ERRLOG("mdisk calloc() failed\n");
|
||||
@ -386,7 +456,7 @@ create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts)
|
||||
* TODO: need to pass a hint so we know which socket to allocate
|
||||
* from on multi-socket systems.
|
||||
*/
|
||||
mdisk->malloc_buf = spdk_zmalloc(opts->num_blocks * opts->block_size, 2 * 1024 * 1024, NULL,
|
||||
mdisk->malloc_buf = spdk_zmalloc(opts->num_blocks * block_size, 2 * 1024 * 1024, NULL,
|
||||
SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
|
||||
if (!mdisk->malloc_buf) {
|
||||
SPDK_ERRLOG("malloc_buf spdk_zmalloc() failed\n");
|
||||
@ -394,6 +464,16 @@ create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (!opts->md_interleave && opts->md_size != 0) {
|
||||
mdisk->malloc_md_buf = spdk_zmalloc(opts->num_blocks * opts->md_size, 2 * 1024 * 1024, NULL,
|
||||
SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
|
||||
if (!mdisk->malloc_md_buf) {
|
||||
SPDK_ERRLOG("malloc_md_buf spdk_zmalloc() failed\n");
|
||||
malloc_disk_free(mdisk);
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
if (opts->name) {
|
||||
mdisk->disk.name = strdup(opts->name);
|
||||
} else {
|
||||
@ -408,8 +488,10 @@ create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts)
|
||||
mdisk->disk.product_name = "Malloc disk";
|
||||
|
||||
mdisk->disk.write_cache = 1;
|
||||
mdisk->disk.blocklen = opts->block_size;
|
||||
mdisk->disk.blocklen = block_size;
|
||||
mdisk->disk.blockcnt = opts->num_blocks;
|
||||
mdisk->disk.md_len = opts->md_size;
|
||||
mdisk->disk.md_interleave = opts->md_interleave;
|
||||
if (opts->optimal_io_boundary) {
|
||||
mdisk->disk.optimal_io_boundary = opts->optimal_io_boundary;
|
||||
mdisk->disk.split_on_optimal_io_boundary = true;
|
||||
|
@ -19,6 +19,8 @@ struct malloc_bdev_opts {
|
||||
uint64_t num_blocks;
|
||||
uint32_t block_size;
|
||||
uint32_t optimal_io_boundary;
|
||||
uint32_t md_size;
|
||||
bool md_interleave;
|
||||
};
|
||||
|
||||
int create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts);
|
||||
|
@ -36,6 +36,8 @@ static const struct spdk_json_object_decoder rpc_construct_malloc_decoders[] = {
|
||||
{"num_blocks", offsetof(struct malloc_bdev_opts, num_blocks), spdk_json_decode_uint64},
|
||||
{"block_size", offsetof(struct malloc_bdev_opts, block_size), spdk_json_decode_uint32},
|
||||
{"optimal_io_boundary", offsetof(struct malloc_bdev_opts, optimal_io_boundary), spdk_json_decode_uint32, true},
|
||||
{"md_size", offsetof(struct malloc_bdev_opts, md_size), spdk_json_decode_uint32, true},
|
||||
{"md_interleave", offsetof(struct malloc_bdev_opts, md_interleave), spdk_json_decode_bool, true},
|
||||
};
|
||||
|
||||
static void
|
||||
|
@ -234,15 +234,18 @@ def bdev_ocf_set_seqcutoff(client, name, policy, threshold, promotion_count):
|
||||
return client.call('bdev_ocf_set_seqcutoff', params)
|
||||
|
||||
|
||||
def bdev_malloc_create(client, num_blocks, block_size, name=None, uuid=None, optimal_io_boundary=None):
|
||||
def bdev_malloc_create(client, num_blocks, block_size, name=None, uuid=None, optimal_io_boundary=None,
|
||||
md_size=None, md_interleave=None):
|
||||
"""Construct a malloc block device.
|
||||
|
||||
Args:
|
||||
num_blocks: size of block device in blocks
|
||||
block_size: block size of device; must be a power of 2 and at least 512
|
||||
block_size: Data block size of device; must be a power of 2 and at least 512
|
||||
name: name of block device (optional)
|
||||
uuid: UUID of block device (optional)
|
||||
optimal_io_boundary: Split on optimal IO boundary, in number of blocks, default 0 (disabled, optional)
|
||||
md_size: metadata size of device (0, 8, 16, 32, 64, or 128), default 0 (optional)
|
||||
md_interleave: metadata location, interleaved if set, and separated if omitted (optional)
|
||||
|
||||
Returns:
|
||||
Name of created block device.
|
||||
@ -254,6 +257,11 @@ def bdev_malloc_create(client, num_blocks, block_size, name=None, uuid=None, opt
|
||||
params['uuid'] = uuid
|
||||
if optimal_io_boundary:
|
||||
params['optimal_io_boundary'] = optimal_io_boundary
|
||||
if md_size:
|
||||
params['md_size'] = md_size
|
||||
if md_interleave:
|
||||
params['md_interleave'] = md_interleave
|
||||
|
||||
return client.call('bdev_malloc_create', params)
|
||||
|
||||
|
||||
|
@ -353,15 +353,21 @@ if __name__ == "__main__":
|
||||
block_size=args.block_size,
|
||||
name=args.name,
|
||||
uuid=args.uuid,
|
||||
optimal_io_boundary=args.optimal_io_boundary))
|
||||
optimal_io_boundary=args.optimal_io_boundary,
|
||||
md_size=args.md_size,
|
||||
md_interleave=args.md_interleave))
|
||||
p = subparsers.add_parser('bdev_malloc_create', help='Create a bdev with malloc backend')
|
||||
p.add_argument('-b', '--name', help="Name of the bdev")
|
||||
p.add_argument('-u', '--uuid', help="UUID of the bdev")
|
||||
p.add_argument(
|
||||
'total_size', help='Size of malloc bdev in MB (float > 0)', type=float)
|
||||
p.add_argument('block_size', help='Block size for this bdev', type=int)
|
||||
p.add_argument('block_size', help='Data block size for this bdev', type=int)
|
||||
p.add_argument('-o', '--optimal-io-boundary', help="""Split on optimal IO boundary, in number of
|
||||
blocks, default 0 (disabled)""", type=int)
|
||||
p.add_argument('-m', '--md-size', type=int,
|
||||
help='Metadata size for this bdev (0, 8, 16, 32, 64, or 128). Default is 0.')
|
||||
p.add_argument('-i', '--md-interleave', action='store_true',
|
||||
help='Metadata location, interleaved if set, and separated if omitted.')
|
||||
p.set_defaults(func=bdev_malloc_create)
|
||||
|
||||
def bdev_malloc_delete(args):
|
||||
|
Loading…
Reference in New Issue
Block a user