bdev/malloc: Support protection information for read and write
For write, verify DIF/DIX before submission and for read, verify DIF/DIX after successful completion. As same as the NVMe bdev module and the NULL bdev module, DIF/DIX verification is done based on the DIF type and DIF insert/strip is not supported. In near future, the bdev I/O APIs bring an I/O flag to the underlying bdev and the malloc bdev module will be able to decide DIF/DIX verification based on the I/O flag. One important feature is to setup protection information when creating a malloc disk. Otherwise, all initial reads will fail if protection information is enabled. For users, add some explanation about the dif_type parameter into doc/jsonrpc.md. Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com> Change-Id: I93757b77c03cade766c872e418bb46d44918bee2 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14985 Reviewed-by: Changpeng Liu <changpeng.liu@intel.com> Reviewed-by: Aleksey Marchuk <alexeymar@nvidia.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Mellanox Build Bot
This commit is contained in:
parent
aef00d4420
commit
00bff560dd
@ -6,6 +6,8 @@
|
||||
|
||||
Both of interleaved and separated metadata are now supported by the malloc bdev module.
|
||||
|
||||
Protection information is now supported by the malloc bdev module.
|
||||
|
||||
### scheduler
|
||||
|
||||
Changing scheduler from dynamic back to static is no longer possible,
|
||||
|
@ -2785,6 +2785,15 @@ Example response:
|
||||
|
||||
Construct @ref bdev_config_malloc
|
||||
|
||||
The `dif_type` parameter can have 0, 1, 2, or 3, and controls the check of the guard tag and the reference tag.
|
||||
If the `dif_type` is 1, 2, or 3, the malloc bdev compares the guard tag to the CRC-16 computed over the block data.
|
||||
If the `dif_type` is 1 or 2, the malloc bdev compares the reference tag to the computed reference tag.
|
||||
The computed reference tag for the first block of the I/O is the `init_ref_tag` of the DIF context, and
|
||||
the computed reference tag is incremented for each subsequent block.
|
||||
If the `dif_type` is 3, the malloc bdev does not check the reference tag.
|
||||
The application tag is not checked by the malloc bdev because the current block device API does not expose
|
||||
it to the upper layer yet.
|
||||
|
||||
#### Parameters
|
||||
|
||||
Name | Optional | Type | Description
|
||||
@ -2796,6 +2805,8 @@ uuid | Optional | string | UUID of new bdev
|
||||
optimal_io_boundary | Optional | number | Split on optimal IO boundary, in number of blocks, default 0
|
||||
md_size | Optional | number | Metadata size for this bdev (0, 8, 16, 32, 64, or 128). Default is 0.
|
||||
md_interleave | Optional | boolean | Metadata location, interleaved if true, and separated if false. Default is false.
|
||||
dif_type | Optional | number | Protection information type. Parameter --md-size needs to be set along --dif-type. Default=0 - no protection.
|
||||
dif_is_head_of_md | Optional | boolean | Protection information is in the first 8 bytes of metadata. Default=false.
|
||||
|
||||
#### Result
|
||||
|
||||
|
@ -33,10 +33,68 @@ struct malloc_channel {
|
||||
TAILQ_HEAD(, malloc_task) completed_tasks;
|
||||
};
|
||||
|
||||
static int
|
||||
malloc_verify_pi(struct spdk_bdev_io *bdev_io)
|
||||
{
|
||||
struct spdk_bdev *bdev = bdev_io->bdev;
|
||||
struct spdk_dif_ctx dif_ctx;
|
||||
struct spdk_dif_error err_blk;
|
||||
int rc;
|
||||
|
||||
rc = spdk_dif_ctx_init(&dif_ctx,
|
||||
bdev->blocklen,
|
||||
bdev->md_len,
|
||||
bdev->md_interleave,
|
||||
bdev->dif_is_head_of_md,
|
||||
bdev->dif_type,
|
||||
bdev->dif_check_flags,
|
||||
bdev_io->u.bdev.offset_blocks & 0xFFFFFFFF,
|
||||
0xFFFF, 0, 0, 0);
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("Failed to initialize DIF/DIX context\n");
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (spdk_bdev_is_md_interleaved(bdev)) {
|
||||
rc = spdk_dif_verify(bdev_io->u.bdev.iovs,
|
||||
bdev_io->u.bdev.iovcnt,
|
||||
bdev_io->u.bdev.num_blocks,
|
||||
&dif_ctx,
|
||||
&err_blk);
|
||||
} else {
|
||||
struct iovec md_iov = {
|
||||
.iov_base = bdev_io->u.bdev.md_buf,
|
||||
.iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len,
|
||||
};
|
||||
|
||||
rc = spdk_dix_verify(bdev_io->u.bdev.iovs,
|
||||
bdev_io->u.bdev.iovcnt,
|
||||
&md_iov,
|
||||
bdev_io->u.bdev.num_blocks,
|
||||
&dif_ctx,
|
||||
&err_blk);
|
||||
}
|
||||
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("DIF/DIX verify failed: lba %" PRIu64 ", num_blocks %" PRIu64 ", "
|
||||
"err_type %u, expected %u, actual %u, err_offset %u\n",
|
||||
bdev_io->u.bdev.offset_blocks,
|
||||
bdev_io->u.bdev.num_blocks,
|
||||
err_blk.err_type,
|
||||
err_blk.expected,
|
||||
err_blk.actual,
|
||||
err_blk.err_offset);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void
|
||||
malloc_done(void *ref, int status)
|
||||
{
|
||||
struct malloc_task *task = (struct malloc_task *)ref;
|
||||
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(task);
|
||||
int rc;
|
||||
|
||||
if (status != 0) {
|
||||
if (status == -ENOMEM) {
|
||||
@ -46,9 +104,20 @@ malloc_done(void *ref, int status)
|
||||
}
|
||||
}
|
||||
|
||||
if (--task->num_outstanding == 0) {
|
||||
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status);
|
||||
if (--task->num_outstanding != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (bdev_io->bdev->dif_type != SPDK_DIF_DISABLE &&
|
||||
bdev_io->type == SPDK_BDEV_IO_TYPE_READ &&
|
||||
task->status == SPDK_BDEV_IO_STATUS_SUCCESS) {
|
||||
rc = malloc_verify_pi(bdev_io);
|
||||
if (rc != 0) {
|
||||
task->status = SPDK_BDEV_IO_STATUS_FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(task), task->status);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -252,6 +321,7 @@ _bdev_malloc_submit_request(struct malloc_channel *mch, struct spdk_bdev_io *bde
|
||||
{
|
||||
uint32_t block_size = bdev_io->bdev->blocklen;
|
||||
uint32_t md_size = bdev_io->bdev->md_len;
|
||||
int rc;
|
||||
|
||||
switch (bdev_io->type) {
|
||||
case SPDK_BDEV_IO_TYPE_READ:
|
||||
@ -279,6 +349,15 @@ _bdev_malloc_submit_request(struct malloc_channel *mch, struct spdk_bdev_io *bde
|
||||
return 0;
|
||||
|
||||
case SPDK_BDEV_IO_TYPE_WRITE:
|
||||
if (bdev_io->bdev->dif_type != SPDK_DIF_DISABLE) {
|
||||
rc = malloc_verify_pi(bdev_io);
|
||||
if (rc != 0) {
|
||||
malloc_complete_task((struct malloc_task *)bdev_io->driver_ctx, mch,
|
||||
SPDK_BDEV_IO_STATUS_FAILED);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
bdev_malloc_writev((struct malloc_disk *)bdev_io->bdev->ctxt,
|
||||
mch->accel_channel,
|
||||
(struct malloc_task *)bdev_io->driver_ctx,
|
||||
@ -406,6 +485,47 @@ static const struct spdk_bdev_fn_table malloc_fn_table = {
|
||||
.write_config_json = bdev_malloc_write_json_config,
|
||||
};
|
||||
|
||||
static int
|
||||
malloc_disk_setup_pi(struct malloc_disk *mdisk)
|
||||
{
|
||||
struct spdk_bdev *bdev = &mdisk->disk;
|
||||
struct spdk_dif_ctx dif_ctx;
|
||||
struct iovec iov, md_iov;
|
||||
int rc;
|
||||
|
||||
rc = spdk_dif_ctx_init(&dif_ctx,
|
||||
bdev->blocklen,
|
||||
bdev->md_len,
|
||||
bdev->md_interleave,
|
||||
bdev->dif_is_head_of_md,
|
||||
bdev->dif_type,
|
||||
bdev->dif_check_flags,
|
||||
0, /* configure the whole buffers */
|
||||
0, 0, 0, 0);
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("Initialization of DIF/DIX context failed\n");
|
||||
return rc;
|
||||
}
|
||||
|
||||
iov.iov_base = mdisk->malloc_buf;
|
||||
iov.iov_len = bdev->blockcnt * bdev->blocklen;
|
||||
|
||||
if (mdisk->disk.md_interleave) {
|
||||
rc = spdk_dif_generate(&iov, 1, bdev->blockcnt, &dif_ctx);
|
||||
} else {
|
||||
md_iov.iov_base = mdisk->malloc_md_buf;
|
||||
md_iov.iov_len = bdev->blockcnt * bdev->md_len;
|
||||
|
||||
rc = spdk_dix_generate(&iov, 1, &md_iov, bdev->blockcnt, &dif_ctx);
|
||||
}
|
||||
|
||||
if (rc != 0) {
|
||||
SPDK_ERRLOG("Formatting by DIF/DIX failed\n");
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int
|
||||
create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts)
|
||||
{
|
||||
@ -444,6 +564,16 @@ create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts)
|
||||
block_size = opts->block_size;
|
||||
}
|
||||
|
||||
if (opts->dif_type < SPDK_DIF_DISABLE || opts->dif_type > SPDK_DIF_TYPE3) {
|
||||
SPDK_ERRLOG("DIF type is invalid\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (opts->dif_type != SPDK_DIF_DISABLE && opts->md_size == 0) {
|
||||
SPDK_ERRLOG("Metadata size should not be zero if DIF is enabled\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mdisk = calloc(1, sizeof(*mdisk));
|
||||
if (!mdisk) {
|
||||
SPDK_ERRLOG("mdisk calloc() failed\n");
|
||||
@ -492,6 +622,34 @@ create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts)
|
||||
mdisk->disk.blockcnt = opts->num_blocks;
|
||||
mdisk->disk.md_len = opts->md_size;
|
||||
mdisk->disk.md_interleave = opts->md_interleave;
|
||||
mdisk->disk.dif_type = opts->dif_type;
|
||||
mdisk->disk.dif_is_head_of_md = opts->dif_is_head_of_md;
|
||||
/* Current block device layer API does not propagate
|
||||
* any DIF related information from user. So, we can
|
||||
* not generate or verify Application Tag.
|
||||
*/
|
||||
switch (opts->dif_type) {
|
||||
case SPDK_DIF_TYPE1:
|
||||
case SPDK_DIF_TYPE2:
|
||||
mdisk->disk.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK |
|
||||
SPDK_DIF_FLAGS_REFTAG_CHECK;
|
||||
break;
|
||||
case SPDK_DIF_TYPE3:
|
||||
mdisk->disk.dif_check_flags = SPDK_DIF_FLAGS_GUARD_CHECK;
|
||||
break;
|
||||
case SPDK_DIF_DISABLE:
|
||||
break;
|
||||
}
|
||||
|
||||
if (opts->dif_type != SPDK_DIF_DISABLE) {
|
||||
rc = malloc_disk_setup_pi(mdisk);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Failed to set up protection information.\n");
|
||||
malloc_disk_free(mdisk);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
if (opts->optimal_io_boundary) {
|
||||
mdisk->disk.optimal_io_boundary = opts->optimal_io_boundary;
|
||||
mdisk->disk.split_on_optimal_io_boundary = true;
|
||||
|
@ -21,6 +21,8 @@ struct malloc_bdev_opts {
|
||||
uint32_t optimal_io_boundary;
|
||||
uint32_t md_size;
|
||||
bool md_interleave;
|
||||
enum spdk_dif_type dif_type;
|
||||
bool dif_is_head_of_md;
|
||||
};
|
||||
|
||||
int create_malloc_disk(struct spdk_bdev **bdev, const struct malloc_bdev_opts *opts);
|
||||
|
@ -38,6 +38,8 @@ static const struct spdk_json_object_decoder rpc_construct_malloc_decoders[] = {
|
||||
{"optimal_io_boundary", offsetof(struct malloc_bdev_opts, optimal_io_boundary), spdk_json_decode_uint32, true},
|
||||
{"md_size", offsetof(struct malloc_bdev_opts, md_size), spdk_json_decode_uint32, true},
|
||||
{"md_interleave", offsetof(struct malloc_bdev_opts, md_interleave), spdk_json_decode_bool, true},
|
||||
{"dif_type", offsetof(struct malloc_bdev_opts, dif_type), spdk_json_decode_int32, true},
|
||||
{"dif_is_head_of_md", offsetof(struct malloc_bdev_opts, dif_is_head_of_md), spdk_json_decode_bool, true},
|
||||
};
|
||||
|
||||
static void
|
||||
|
@ -235,7 +235,7 @@ def bdev_ocf_set_seqcutoff(client, name, policy, threshold, promotion_count):
|
||||
|
||||
|
||||
def bdev_malloc_create(client, num_blocks, block_size, name=None, uuid=None, optimal_io_boundary=None,
|
||||
md_size=None, md_interleave=None):
|
||||
md_size=None, md_interleave=None, dif_type=None, dif_is_head_of_md=None):
|
||||
"""Construct a malloc block device.
|
||||
|
||||
Args:
|
||||
@ -246,6 +246,8 @@ def bdev_malloc_create(client, num_blocks, block_size, name=None, uuid=None, opt
|
||||
optimal_io_boundary: Split on optimal IO boundary, in number of blocks, default 0 (disabled, optional)
|
||||
md_size: metadata size of device (0, 8, 16, 32, 64, or 128), default 0 (optional)
|
||||
md_interleave: metadata location, interleaved if set, and separated if omitted (optional)
|
||||
dif_type: protection information type (optional)
|
||||
dif_is_head_of_md: protection information is in the first 8 bytes of metadata (optional)
|
||||
|
||||
Returns:
|
||||
Name of created block device.
|
||||
@ -261,6 +263,10 @@ def bdev_malloc_create(client, num_blocks, block_size, name=None, uuid=None, opt
|
||||
params['md_size'] = md_size
|
||||
if md_interleave:
|
||||
params['md_interleave'] = md_interleave
|
||||
if dif_type:
|
||||
params['dif_type'] = dif_type
|
||||
if dif_is_head_of_md:
|
||||
params['dif_is_head_of_md'] = dif_is_head_of_md
|
||||
|
||||
return client.call('bdev_malloc_create', params)
|
||||
|
||||
|
@ -355,7 +355,9 @@ if __name__ == "__main__":
|
||||
uuid=args.uuid,
|
||||
optimal_io_boundary=args.optimal_io_boundary,
|
||||
md_size=args.md_size,
|
||||
md_interleave=args.md_interleave))
|
||||
md_interleave=args.md_interleave,
|
||||
dif_type=args.dif_type,
|
||||
dif_is_head_of_md=args.dif_is_head_of_md))
|
||||
p = subparsers.add_parser('bdev_malloc_create', help='Create a bdev with malloc backend')
|
||||
p.add_argument('-b', '--name', help="Name of the bdev")
|
||||
p.add_argument('-u', '--uuid', help="UUID of the bdev")
|
||||
@ -368,6 +370,11 @@ if __name__ == "__main__":
|
||||
help='Metadata size for this bdev (0, 8, 16, 32, 64, or 128). Default is 0.')
|
||||
p.add_argument('-i', '--md-interleave', action='store_true',
|
||||
help='Metadata location, interleaved if set, and separated if omitted.')
|
||||
p.add_argument('-t', '--dif-type', type=int, choices=[0, 1, 2, 3],
|
||||
help='Protection information type. Parameter --md-size needs'
|
||||
'to be set along --dif-type. Default=0 - no protection.')
|
||||
p.add_argument('-d', '--dif-is-head-of-md', action='store_true',
|
||||
help='Protection information is in the first 8 bytes of metadata. Default=false.')
|
||||
p.set_defaults(func=bdev_malloc_create)
|
||||
|
||||
def bdev_malloc_delete(args):
|
||||
|
Loading…
Reference in New Issue
Block a user