bdev/uring: Add support for zoned io in uring bdev.
Enables the use of uring bdev with ZNS devices. Uses BLKXXXZONE ioctls for implementing the zone operations. Signed-off-by: Indraneel M <Indraneel.Mukherjee@wdc.com> Change-Id: I440e316138182e25d89eb7224932e19bef9a005f Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13550 Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
parent
2d37b82e6b
commit
8b8401959e
3
CONFIG
3
CONFIG
@ -161,6 +161,9 @@ CONFIG_ISAL=y
|
||||
# Build with IO_URING support
|
||||
CONFIG_URING=n
|
||||
|
||||
# Build IO_URING bdev with ZNS support
|
||||
CONFIG_URING_ZNS=n
|
||||
|
||||
# Path to custom built IO_URING library
|
||||
CONFIG_URING_PATH=
|
||||
|
||||
|
16
configure
vendored
16
configure
vendored
@ -97,6 +97,7 @@ function usage() {
|
||||
echo " --without-uring If an argument is provided, it is considered a directory containing"
|
||||
echo " liburing.a and io_uring.h. Otherwise the regular system paths will"
|
||||
echo " be searched."
|
||||
echo " --with-uring-zns Build I/O uring module with ZNS (zoned namespaces) support."
|
||||
echo " --with-openssl[=DIR] Build OPENSSL with custom path. Otherwise the regular system paths will"
|
||||
echo " be searched."
|
||||
echo " --with-fuse Build FUSE components for mounting a blobfs filesystem."
|
||||
@ -608,6 +609,9 @@ for i in "$@"; do
|
||||
CONFIG[URING]=n
|
||||
CONFIG[URING_PATH]=
|
||||
;;
|
||||
--with-uring-zns)
|
||||
CONFIG[URING_ZNS]=y
|
||||
;;
|
||||
--with-openssl=*)
|
||||
check_dir "$i"
|
||||
CONFIG[OPENSSL_PATH]=$(readlink -f ${i#*=})
|
||||
@ -1122,6 +1126,18 @@ if [[ "${CONFIG[URING]}" = "y" ]]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "${CONFIG[URING_ZNS]}" = "y" ]]; then
|
||||
if [[ "${CONFIG[URING]}" = "n" ]]; then
|
||||
echo "--with-uring-zns requires --with-uring."
|
||||
exit 1
|
||||
fi
|
||||
if ! echo -e '#include<linux/blkzoned.h>\nint main(void) { return BLK_ZONE_REP_CAPACITY; }\n' \
|
||||
| "${BUILD_CMD[@]}" -c - 2> /dev/null; then
|
||||
echo "--with-uring-zns requires blkzoned.h (from kernel >= linux-5.9)."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "${CONFIG[FUSE]}" = "y" ]]; then
|
||||
if [[ ! -d /usr/include/fuse3 ]] && [[ ! -d /usr/local/include/fuse3 ]]; then
|
||||
echo "--with-fuse requires libfuse3."
|
||||
|
@ -605,6 +605,10 @@ The user needs to configure SPDK to include io_uring support:
|
||||
|
||||
`configure --with-uring`
|
||||
|
||||
To enable uring bdev for ZNS devices use the following:
|
||||
|
||||
`configure --with-uring --with-uring-zns`
|
||||
|
||||
To create a uring bdev with given filename, bdev name and block size use the `bdev_uring_create` RPC.
|
||||
|
||||
`rpc.py bdev_uring_create /path/to/device bdev_u0 512`
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include "bdev_uring.h"
|
||||
|
||||
#include "spdk/stdinc.h"
|
||||
|
||||
#include "spdk/config.h"
|
||||
#include "spdk/barrier.h"
|
||||
#include "spdk/bdev.h"
|
||||
#include "spdk/env.h"
|
||||
@ -20,6 +20,17 @@
|
||||
#include "spdk/log.h"
|
||||
#include "spdk_internal/uring.h"
|
||||
|
||||
#ifdef SPDK_CONFIG_URING_ZNS
|
||||
#include <linux/blkzoned.h>
|
||||
#define SECTOR_SHIFT 9
|
||||
#endif
|
||||
|
||||
struct bdev_uring_zoned_dev {
|
||||
uint64_t num_zones;
|
||||
uint32_t zone_shift;
|
||||
uint32_t lba_shift;
|
||||
};
|
||||
|
||||
struct bdev_uring_io_channel {
|
||||
struct bdev_uring_group_channel *group_ch;
|
||||
};
|
||||
@ -39,6 +50,7 @@ struct bdev_uring_task {
|
||||
|
||||
struct bdev_uring {
|
||||
struct spdk_bdev bdev;
|
||||
struct bdev_uring_zoned_dev zd;
|
||||
char *filename;
|
||||
int fd;
|
||||
TAILQ_ENTRY(bdev_uring) link;
|
||||
@ -273,10 +285,271 @@ bdev_uring_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SPDK_CONFIG_URING_ZNS
|
||||
static int
|
||||
bdev_uring_read_sysfs_attr(const char *devname, const char *attr, char *str, int str_len)
|
||||
{
|
||||
char *path = NULL;
|
||||
char *device = NULL;
|
||||
FILE *file;
|
||||
int ret = 0;
|
||||
|
||||
device = basename(devname);
|
||||
path = spdk_sprintf_alloc("/sys/block/%s/%s", device, attr);
|
||||
if (!path) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
file = fopen(path, "r");
|
||||
if (!file) {
|
||||
free(path);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
if (!fgets(str, str_len, file)) {
|
||||
ret = -EINVAL;
|
||||
goto close;
|
||||
}
|
||||
|
||||
spdk_str_chomp(str);
|
||||
|
||||
close:
|
||||
free(path);
|
||||
fclose(file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
bdev_uring_read_sysfs_attr_long(const char *devname, const char *attr, long *val)
|
||||
{
|
||||
char str[128];
|
||||
int ret;
|
||||
|
||||
ret = bdev_uring_read_sysfs_attr(devname, attr, str, sizeof(str));
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
*val = spdk_strtol(str, 10);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
bdev_uring_fill_zone_state(struct spdk_bdev_zone_info *zone_info, struct blk_zone *zones_rep)
|
||||
{
|
||||
switch (zones_rep->cond) {
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
zone_info->state = SPDK_BDEV_ZONE_STATE_EMPTY;
|
||||
break;
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
zone_info->state = SPDK_BDEV_ZONE_STATE_IMP_OPEN;
|
||||
break;
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
zone_info->state = SPDK_BDEV_ZONE_STATE_EXP_OPEN;
|
||||
break;
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
zone_info->state = SPDK_BDEV_ZONE_STATE_CLOSED;
|
||||
break;
|
||||
case BLK_ZONE_COND_READONLY:
|
||||
zone_info->state = SPDK_BDEV_ZONE_STATE_READ_ONLY;
|
||||
break;
|
||||
case BLK_ZONE_COND_FULL:
|
||||
zone_info->state = SPDK_BDEV_ZONE_STATE_FULL;
|
||||
break;
|
||||
case BLK_ZONE_COND_OFFLINE:
|
||||
zone_info->state = SPDK_BDEV_ZONE_STATE_OFFLINE;
|
||||
break;
|
||||
default:
|
||||
SPDK_ERRLOG("Invalid zone state: %#x in zone report\n", zones_rep->cond);
|
||||
return -EIO;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io)
|
||||
{
|
||||
struct bdev_uring *uring;
|
||||
struct blk_zone_range range;
|
||||
long unsigned zone_mgmt_op;
|
||||
uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
|
||||
|
||||
uring = (struct bdev_uring *)bdev_io->bdev->ctxt;
|
||||
|
||||
switch (bdev_io->u.zone_mgmt.zone_action) {
|
||||
case SPDK_BDEV_ZONE_RESET:
|
||||
zone_mgmt_op = BLKRESETZONE;
|
||||
break;
|
||||
case SPDK_BDEV_ZONE_OPEN:
|
||||
zone_mgmt_op = BLKOPENZONE;
|
||||
break;
|
||||
case SPDK_BDEV_ZONE_CLOSE:
|
||||
zone_mgmt_op = BLKCLOSEZONE;
|
||||
break;
|
||||
case SPDK_BDEV_ZONE_FINISH:
|
||||
zone_mgmt_op = BLKFINISHZONE;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
range.sector = (zone_id << uring->zd.lba_shift);
|
||||
range.nr_sectors = (uring->bdev.zone_size << uring->zd.lba_shift);
|
||||
|
||||
if (ioctl(uring->fd, zone_mgmt_op, &range)) {
|
||||
SPDK_ERRLOG("Ioctl BLKXXXZONE(%#x) failed errno: %d(%s)\n",
|
||||
bdev_io->u.zone_mgmt.zone_action, errno, strerror(errno));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io)
|
||||
{
|
||||
struct bdev_uring *uring;
|
||||
struct blk_zone *zones;
|
||||
struct blk_zone_report *rep;
|
||||
struct spdk_bdev_zone_info *zone_info = bdev_io->u.zone_mgmt.buf;
|
||||
size_t repsize;
|
||||
uint32_t i, shift;
|
||||
uint32_t num_zones = bdev_io->u.zone_mgmt.num_zones;
|
||||
uint64_t zone_id = bdev_io->u.zone_mgmt.zone_id;
|
||||
|
||||
uring = (struct bdev_uring *)bdev_io->bdev->ctxt;
|
||||
shift = uring->zd.lba_shift;
|
||||
|
||||
if ((num_zones > uring->zd.num_zones) || !num_zones) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
repsize = sizeof(struct blk_zone_report) + (sizeof(struct blk_zone) * num_zones);
|
||||
rep = (struct blk_zone_report *)malloc(repsize);
|
||||
if (!rep) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
zones = (struct blk_zone *)(rep + 1);
|
||||
|
||||
while (num_zones && ((zone_id >> uring->zd.zone_shift) <= num_zones)) {
|
||||
memset(rep, 0, repsize);
|
||||
rep->sector = zone_id;
|
||||
rep->nr_zones = num_zones;
|
||||
|
||||
if (ioctl(uring->fd, BLKREPORTZONE, rep)) {
|
||||
SPDK_ERRLOG("Ioctl BLKREPORTZONE failed errno: %d(%s)\n",
|
||||
errno, strerror(errno));
|
||||
free(rep);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!rep->nr_zones) {
|
||||
break;
|
||||
}
|
||||
|
||||
for (i = 0; i < rep->nr_zones; i++) {
|
||||
zone_info->zone_id = ((zones + i)->start >> shift);
|
||||
zone_info->write_pointer = ((zones + i)->wp >> shift);
|
||||
zone_info->capacity = ((zones + i)->capacity >> shift);
|
||||
|
||||
bdev_uring_fill_zone_state(zone_info, zones + i);
|
||||
|
||||
zone_id = ((zones + i)->start + (zones + i)->len) >> shift;
|
||||
zone_info++;
|
||||
num_zones--;
|
||||
}
|
||||
}
|
||||
|
||||
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
|
||||
free(rep);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename)
|
||||
{
|
||||
char str[128];
|
||||
long int val = 0;
|
||||
uint32_t zinfo;
|
||||
int retval = -1;
|
||||
|
||||
uring->bdev.zoned = false;
|
||||
|
||||
/* Check if this is a zoned block device */
|
||||
if (bdev_uring_read_sysfs_attr(filename, "queue/zoned", str, sizeof(str))) {
|
||||
SPDK_ERRLOG("Unable to open file %s/queue/zoned. errno: %d\n", filename, errno);
|
||||
} else if (strcmp(str, "host-aware") == 0 || strcmp(str, "host-managed") == 0) {
|
||||
/* Only host-aware & host-managed zns devices */
|
||||
uring->bdev.zoned = true;
|
||||
|
||||
if (ioctl(uring->fd, BLKGETNRZONES, &zinfo)) {
|
||||
SPDK_ERRLOG("ioctl BLKNRZONES failed %d (%s)\n", errno, strerror(errno));
|
||||
goto err_ret;
|
||||
}
|
||||
uring->zd.num_zones = zinfo;
|
||||
|
||||
if (ioctl(uring->fd, BLKGETZONESZ, &zinfo)) {
|
||||
SPDK_ERRLOG("ioctl BLKGETZONESZ failed %d (%s)\n", errno, strerror(errno));
|
||||
goto err_ret;
|
||||
}
|
||||
|
||||
uring->zd.lba_shift = uring->bdev.required_alignment - SECTOR_SHIFT;
|
||||
uring->bdev.zone_size = (zinfo >> uring->zd.lba_shift);
|
||||
uring->zd.zone_shift = spdk_u32log2(zinfo >> uring->zd.lba_shift);
|
||||
|
||||
if (bdev_uring_read_sysfs_attr_long(filename, "queue/max_open_zones", &val)) {
|
||||
SPDK_ERRLOG("Failed to get max open zones %d (%s)\n", errno, strerror(errno));
|
||||
goto err_ret;
|
||||
}
|
||||
uring->bdev.max_open_zones = uring->bdev.optimal_open_zones = (uint32_t)val;
|
||||
|
||||
if (bdev_uring_read_sysfs_attr_long(filename, "queue/max_active_zones", &val)) {
|
||||
SPDK_ERRLOG("Failed to get max active zones %d (%s)\n", errno, strerror(errno));
|
||||
goto err_ret;
|
||||
}
|
||||
uring->bdev.max_active_zones = (uint32_t)val;
|
||||
retval = 0;
|
||||
} else {
|
||||
retval = 0; /* queue/zoned=none */
|
||||
}
|
||||
|
||||
err_ret:
|
||||
return retval;
|
||||
}
|
||||
#else
|
||||
/* No support for zoned devices */
|
||||
static int
|
||||
bdev_uring_zone_management_op(struct spdk_bdev_io *bdev_io)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
bdev_uring_zone_get_info(struct spdk_bdev_io *bdev_io)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
bdev_uring_check_zoned_support(struct bdev_uring *uring, const char *name, const char *filename)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
_bdev_uring_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
|
||||
{
|
||||
|
||||
switch (bdev_io->type) {
|
||||
case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
|
||||
return bdev_uring_zone_get_info(bdev_io);
|
||||
case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
|
||||
return bdev_uring_zone_management_op(bdev_io);
|
||||
/* Read and write operations must be performed on buffers aligned to
|
||||
* bdev->required_alignment. If user specified unaligned buffers,
|
||||
* get the aligned buffer from the pool by calling spdk_bdev_io_get_buf. */
|
||||
@ -302,6 +575,10 @@ static bool
|
||||
bdev_uring_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
|
||||
{
|
||||
switch (io_type) {
|
||||
#ifdef SPDK_CONFIG_URING_ZNS
|
||||
case SPDK_BDEV_IO_TYPE_GET_ZONE_INFO:
|
||||
case SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT:
|
||||
#endif
|
||||
case SPDK_BDEV_IO_TYPE_READ:
|
||||
case SPDK_BDEV_IO_TYPE_WRITE:
|
||||
return true;
|
||||
@ -483,6 +760,11 @@ create_uring_bdev(const char *name, const char *filename, uint32_t block_size)
|
||||
uring->bdev.blocklen = block_size;
|
||||
uring->bdev.required_alignment = spdk_u32log2(block_size);
|
||||
|
||||
rc = bdev_uring_check_zoned_support(uring, name, filename);
|
||||
if (rc) {
|
||||
goto error_return;
|
||||
}
|
||||
|
||||
if (bdev_size % uring->bdev.blocklen != 0) {
|
||||
SPDK_ERRLOG("Disk size %" PRIu64 " is not a multiple of block size %" PRIu32 "\n",
|
||||
bdev_size, uring->bdev.blocklen);
|
||||
|
Loading…
Reference in New Issue
Block a user