bdev/malloc: Support protection information for read and write

For write, verify DIF/DIX before submission and for read, verify
DIF/DIX after successful completion.

As same as the NVMe bdev module and the NULL bdev module, DIF/DIX
verification is done based on the DIF type and DIF insert/strip is
not supported.

In near future, the bdev I/O APIs bring an I/O flag to the underlying
bdev and the malloc bdev module will be able to decide DIF/DIX
verification based on the I/O flag.

One important feature is to setup protection information when
creating a malloc disk. Otherwise, all initial reads will fail
if protection information is enabled.

For users, add some explanation about the dif_type parameter
into doc/jsonrpc.md.

Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: I93757b77c03cade766c872e418bb46d44918bee2
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14985
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
This commit is contained in:
Shuhei Matsumoto 2022-10-26 14:58:52 +09:00 committed by Tomasz Zawadzki
parent aef00d4420
commit 00bff560dd
7 changed files with 192 additions and 4 deletions

View File

@ -6,6 +6,8 @@
Both of interleaved and separated metadata are now supported by the malloc bdev module.
Protection information is now supported by the malloc bdev module.
### scheduler
Changing scheduler from dynamic back to static is no longer possible,

View File

@ -2785,6 +2785,15 @@ Example response:
Construct @ref bdev_config_malloc
The `dif_type` parameter can have 0, 1, 2, or 3, and controls the check of the guard tag and the reference tag.
If the `dif_type` is 1, 2, or 3, the malloc bdev compares the guard tag to the CRC-16 computed over the block data.
If the `dif_type` is 1 or 2, the malloc bdev compares the reference tag to the computed reference tag.
The computed reference tag for the first block of the I/O is the `init_ref_tag` of the DIF context, and
the computed reference tag is incremented for each subsequent block.
If the `dif_type` is 3, the malloc bdev does not check the reference tag.
The application tag is not checked by the malloc bdev because the current block device API does not expose
it to the upper layer yet.
#### Parameters
Name | Optional | Type | Description
@ -2796,6 +2805,8 @@ uuid | Optional | string | UUID of new bdev
optimal_io_boundary | Optional | number | Split on optimal IO boundary, in number of blocks, default 0
md_size | Optional | number | Metadata size for this bdev (0, 8, 16, 32, 64, or 128). Default is 0.
md_interleave | Optional | boolean | Metadata location, interleaved if true, and separated if false. Default is false.
dif_type | Optional | number | Protection information type. Parameter --md-size needs to be set along --dif-type. Default=0 - no protection.
dif_is_head_of_md | Optional | boolean | Protection information is in the first 8 bytes of metadata. Default=false.
#### Result

View File

@ -33,10 +33,68 @@ struct malloc_channel {
TAILQ_HEAD(, malloc_task) completed_tasks;
};
static int
malloc_verify_pi(struct spdk_bdev_io *bdev_io)
{
struct spdk_bdev *bdev = bdev_io->bdev;
struct spdk_dif_ctx dif_ctx;
struct spdk_dif_error err_blk;
int rc;
rc = spdk_dif_ctx_init(&dif_ctx,
bdev->blocklen,
bdev->md_len,
bdev->md_interleave,
bdev->dif_is_head_of_md,
bdev->dif_type,
bdev->dif_check_flags,
bdev_io->u.bdev.offset_blocks & 0xFFFFFFFF,
0xFFFF, 0, 0, 0);
if (rc != 0) {
SPDK_ERRLOG("Failed to initialize DIF/DIX context\n");
return rc;
}
if (spdk_bdev_is_md_interleaved(bdev)) {
rc = spdk_dif_verify(bdev_io->u.bdev.iovs,
bdev_io->u.bdev.iovcnt,
bdev_io->u.bdev.num_blocks,
&dif_ctx,
&err_blk);
} else {
struct iovec md_iov = {
.iov_base = bdev_io->u.bdev.md_buf,
.iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len,
};
rc = spdk_dix_verify(bdev_io->u.bdev.iovs,
bdev_io->u.bdev.iovcnt,
&md_iov,
bdev_io->u.bdev.num_blocks,
&dif_ctx,
&err_blk);
}
if (rc != 0) {
SPDK_ERRLOG("DIF/DIX verify failed: lba %" PRIu64 ", num_blocks %" PRIu64 ", "
"err_type %u, expected %u, actual %u, err_offset %u\n",
bdev_io->u.bdev.offset_blocks,
bdev_io->u.bdev.num_blocks,
err_blk.err_type,
err_blk.expected,
err_blk.actual,
err_blk.err_offset);
}
return rc;
}
static void
malloc_done(void *ref, int status)
{
struct malloc_task *task = (struct malloc_task *)ref;
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(task);
int rc;
if (status != 0) {
if (status == -ENOMEM) {
@ -46,9 +104,20 @@ malloc_done(void *ref, int status)
}
}
if (--task->num_outstanding == 0) {
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status);
if (--task->num_outstanding != 0) {
return;
}
if (bdev_io->bdev->dif_type != SPDK_DIF_DISABLE &&
bdev_io->type == SPDK_BDEV_IO_TYPE_READ &&
task->status == SPDK_BDEV_IO_STATUS_SUCCESS) {
rc = malloc_verify_pi(bdev_io);
if (rc != 0) {
task->status = SPDK_BDEV_IO_STATUS_FAILED;
}
}
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status);
}
static void
@ -252,6 +321,7 @@ _bdev_malloc_submit_request(struct malloc_channel *mch, struct spdk_bdev_io *bde
{
uint32_t block_size = bdev_io->bdev->blocklen;
uint32_t md_size = bdev_io->bdev->md_len;
int rc;
switch (bdev_io->type) {
case SPDK_BDEV_IO_TYPE_READ:
@ -279,6 +349,15 @@ _bdev_malloc_submit_request(struct malloc_channel *mch, struct spdk_bdev_io *bde
return 0;
case SPDK_BDEV_IO_TYPE_WRITE:
if (bdev_io->bdev->dif_type != SPDK_DIF_DISABLE) {
rc = malloc_verify_pi(bdev_io);
if (rc != 0) {
malloc_complete_task((struct malloc_task *)bdev_io->driver_ctx, mch,
SPDK_BDEV_IO_STATUS_FAILED);
return 0;
}
}
bdev_malloc_writev((struct malloc_disk *)bdev_io->bdev->ctxt,
mch->accel_channel,
(struct malloc_task *)bdev_io->driver_ctx,
@ -406,6 +485,47 @@ static const struct spdk_bdev_fn_table malloc_fn_table = {
.write_config_json = bdev_malloc_write_json_config,
};
static int
malloc_disk_setup_pi(struct malloc_disk *mdisk)
{
struct spdk_bdev *bdev = &mdisk->disk;
struct spdk_dif_ctx dif_ctx;
struct iovec iov, md_iov;
int rc;
rc = spdk_dif_ctx_init(&dif_ctx,
bdev->blocklen,
bdev->md_len,
bdev->md_interleave,
bdev->dif_is_head_of_md,
bdev->dif_type,
bdev->dif_check_flags,
0, /* configure the whole buffers */
0, 0, 0, 0);
if (rc != 0) {
SPDK_ERRLOG("Initialization of DIF/DIX context failed\n");
return rc;
}
iov.iov_base = mdisk->malloc_buf;
iov.iov_len = bdev->blockcnt * bdev->blocklen;
if (mdisk->disk.md_interleave) {
rc = spdk_dif_generate(&iov, 1, bdev->blockcnt, &dif_ctx);
} else {
md_iov.iov_base = mdisk->malloc_md_buf;
md_iov.iov_len = bdev->blockcnt * bdev->md_len;
rc = spdk_dix_generate(&iov, 1, &md_iov, bdev->blockcnt, &dif_ctx);
}
if (rc != 0) {
SPDK_ERRLOG("Formatting by DIF/DIX failed\n");
}
return rc;
}
int
create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts)
{
@ -444,6 +564,16 @@ create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts)
block_size = opts->block_size;
}
if (opts->dif_type < SPDK_DIF_DISABLE || opts->dif_type > SPDK_DIF_TYPE3) {
SPDK_ERRLOG("DIF type is invalid\n");
return -EINVAL;
}
if (opts->dif_type != SPDK_DIF_DISABLE && opts->md_size == 0) {
SPDK_ERRLOG("Metadata size should not be zero if DIF is enabled\n");
return -EINVAL;
}
mdisk = calloc(1, sizeof(*mdisk));
if (!mdisk) {
SPDK_ERRLOG("mdisk calloc() failed\n");
@ -492,6 +622,34 @@ create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts)
mdisk->disk.blockcnt = opts->num_blocks;
mdisk->disk.md_len = opts->md_size;
mdisk->disk.md_interleave = opts->md_interleave;
mdisk->disk.dif_type = opts->dif_type;
mdisk->disk.dif_is_head_of_md = opts->dif_is_head_of_md;
/* Current block device layer API does not propagate
* any DIF related information from user. So, we can
* not generate or verify Application Tag.
*/
switch (opts->dif_type) {
case SPDK_DIF_TYPE1:
case SPDK_DIF_TYPE2:
mdisk->disk.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK |
SPDK_DIF_FLAGS_REFTAG_CHECK;
break;
case SPDK_DIF_TYPE3:
mdisk->disk.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK;
break;
case SPDK_DIF_DISABLE:
break;
}
if (opts->dif_type != SPDK_DIF_DISABLE) {
rc = malloc_disk_setup_pi(mdisk);
if (rc) {
SPDK_ERRLOG("Failed to set up protection information.\n");
malloc_disk_free(mdisk);
return rc;
}
}
if (opts->optimal_io_boundary) {
mdisk->disk.optimal_io_boundary = opts->optimal_io_boundary;
mdisk->disk.split_on_optimal_io_boundary = true;

View File

@ -21,6 +21,8 @@ struct malloc_bdev_opts {
uint32_t optimal_io_boundary;
uint32_t md_size;
bool md_interleave;
enum spdk_dif_type dif_type;
bool dif_is_head_of_md;
};
int create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts);

View File

@ -38,6 +38,8 @@ static const struct spdk_json_object_decoder rpc_construct_malloc_decoders[] = {
{"optimal_io_boundary", offsetof(struct malloc_bdev_opts, optimal_io_boundary), spdk_json_decode_uint32, true},
{"md_size", offsetof(struct malloc_bdev_opts, md_size), spdk_json_decode_uint32, true},
{"md_interleave", offsetof(struct malloc_bdev_opts, md_interleave), spdk_json_decode_bool, true},
{"dif_type", offsetof(struct malloc_bdev_opts, dif_type), spdk_json_decode_int32, true},
{"dif_is_head_of_md", offsetof(struct malloc_bdev_opts, dif_is_head_of_md), spdk_json_decode_bool, true},
};
static void

View File

@ -235,7 +235,7 @@ def bdev_ocf_set_seqcutoff(client, name, policy, threshold, promotion_count):
def bdev_malloc_create(client, num_blocks, block_size, name=None, uuid=None, optimal_io_boundary=None,
md_size=None, md_interleave=None):
md_size=None, md_interleave=None, dif_type=None, dif_is_head_of_md=None):
"""Construct a malloc block device.
Args:
@ -246,6 +246,8 @@ def bdev_malloc_create(client, num_blocks, block_size, name=None, uuid=None, opt
optimal_io_boundary: Split on optimal IO boundary, in number of blocks, default 0 (disabled, optional)
md_size: metadata size of device (0, 8, 16, 32, 64, or 128), default 0 (optional)
md_interleave: metadata location, interleaved if set, and separated if omitted (optional)
dif_type: protection information type (optional)
dif_is_head_of_md: protection information is in the first 8 bytes of metadata (optional)
Returns:
Name of created block device.
@ -261,6 +263,10 @@ def bdev_malloc_create(client, num_blocks, block_size, name=None, uuid=None, opt
params['md_size'] = md_size
if md_interleave:
params['md_interleave'] = md_interleave
if dif_type:
params['dif_type'] = dif_type
if dif_is_head_of_md:
params['dif_is_head_of_md'] = dif_is_head_of_md
return client.call('bdev_malloc_create', params)

View File

@ -355,7 +355,9 @@ if __name__ == "__main__":
uuid=args.uuid,
optimal_io_boundary=args.optimal_io_boundary,
md_size=args.md_size,
md_interleave=args.md_interleave))
md_interleave=args.md_interleave,
dif_type=args.dif_type,
dif_is_head_of_md=args.dif_is_head_of_md))
p = subparsers.add_parser('bdev_malloc_create', help='Create a bdev with malloc backend')
p.add_argument('-b', '--name', help="Name of the bdev")
p.add_argument('-u', '--uuid', help="UUID of the bdev")
@ -368,6 +370,11 @@ if __name__ == "__main__":
help='Metadata size for this bdev (0, 8, 16, 32, 64, or 128). Default is 0.')
p.add_argument('-i', '--md-interleave', action='store_true',
help='Metadata location, interleaved if set, and separated if omitted.')
p.add_argument('-t', '--dif-type', type=int, choices=[0, 1, 2, 3],
help='Protection information type. Parameter --md-size needs'
'to be set along --dif-type. Default=0 - no protection.')
p.add_argument('-d', '--dif-is-head-of-md', action='store_true',
help='Protection information is in the first 8 bytes of metadata. Default=false.')
p.set_defaults(func=bdev_malloc_create)
def bdev_malloc_delete(args):