idxd: For kernel mode, handle IOMMU+SM mode

If the kernel is booted with the IOMMU enabled and Shared Memory mode
enabled (which are the expected boot parameters for production servers),
then the kernel idxd driver will automatically register a dedicated work
queue with the PASID for the process that opens it. This means that the
descriptors written into the portal for that work queue should be
*virtual* addresses.

If the IOMMU is enabled but Shared Memory mode is disabled, then the
kernel has registered the device with the IOMMU and assigned it I/O
virtual addresses. We have no way to get those addresses from user
space, so we cannot use the kernel driver in this mode. Add a check to
catch that.

If the IOMMU is disabled, then physical addresses are used everywherre.

Change-Id: I0bf079835ad4df1128ef9db54f5564050327e9f7
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14019
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Paul Luse <paul.e.luse@intel.com>
Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com>
This commit is contained in:
Ben Walker 2022-08-08 11:16:15 -07:00 committed by Tomasz Zawadzki
parent 1c098401d8
commit 2371a070c8
3 changed files with 65 additions and 26 deletions

View File

@ -45,10 +45,16 @@ _submit_to_hw(struct spdk_idxd_io_channel *chan, struct idxd_ops *op)
}
inline static int
_vtophys(const void *buf, uint64_t *buf_addr, uint64_t size)
_vtophys(struct spdk_idxd_io_channel *chan, const void *buf, uint64_t *buf_addr, uint64_t size)
{
uint64_t updated_size = size;
if (chan->pasid_enabled) {
/* We can just use virtual addresses */
*buf_addr = (uint64_t)buf;
return 0;
}
*buf_addr = spdk_vtophys(buf, &updated_size);
if (*buf_addr == SPDK_VTOPHYS_ERROR) {
@ -70,16 +76,20 @@ struct idxd_vtophys_iter {
uint64_t len;
uint64_t offset;
bool pasid_enabled;
};
static void
idxd_vtophys_iter_init(struct idxd_vtophys_iter *iter,
idxd_vtophys_iter_init(struct spdk_idxd_io_channel *chan,
struct idxd_vtophys_iter *iter,
const void *src, void *dst, uint64_t len)
{
iter->src = src;
iter->dst = dst;
iter->len = len;
iter->offset = 0;
iter->pasid_enabled = chan->pasid_enabled;
}
static uint64_t
@ -97,6 +107,12 @@ idxd_vtophys_iter_next(struct idxd_vtophys_iter *iter,
return 0;
}
if (iter->pasid_enabled) {
*src_phys = (uint64_t)src;
*dst_phys = (uint64_t)dst;
return iter->len;
}
len = iter->len - iter->offset;
src_off = len;
@ -145,7 +161,7 @@ _dsa_alloc_batches(struct spdk_idxd_io_channel *chan, int num_descriptors)
goto error_user;
}
rc = _vtophys(batch->user_desc, &batch->user_desc_addr,
rc = _vtophys(chan, batch->user_desc, &batch->user_desc_addr,
DESC_PER_BATCH * sizeof(struct idxd_hw_desc));
if (rc) {
SPDK_ERRLOG("Failed to translate batch descriptor memory\n");
@ -161,7 +177,7 @@ _dsa_alloc_batches(struct spdk_idxd_io_channel *chan, int num_descriptors)
}
for (j = 0; j < DESC_PER_BATCH; j++) {
rc = _vtophys(&op->hw, &desc->completion_addr, sizeof(struct dsa_hw_comp_record));
rc = _vtophys(chan, &op->hw, &desc->completion_addr, sizeof(struct dsa_hw_comp_record));
if (rc) {
SPDK_ERRLOG("Failed to translate batch entry completion memory\n");
goto error_user;
@ -208,6 +224,7 @@ spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
}
chan->idxd = idxd;
chan->pasid_enabled = idxd->pasid_enabled;
STAILQ_INIT(&chan->ops_pool);
TAILQ_INIT(&chan->batch_pool);
STAILQ_INIT(&chan->ops_outstanding);
@ -258,7 +275,7 @@ spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
for (i = 0; i < num_descriptors; i++) {
STAILQ_INSERT_TAIL(&chan->ops_pool, op, link);
op->desc = desc;
rc = _vtophys(&op->hw, &desc->completion_addr, comp_rec_size);
rc = _vtophys(chan, &op->hw, &desc->completion_addr, comp_rec_size);
if (rc) {
SPDK_ERRLOG("Failed to translate completion memory\n");
goto error;
@ -650,7 +667,7 @@ spdk_idxd_submit_copy(struct spdk_idxd_io_channel *chan,
len > 0;
len = spdk_ioviter_next(&iter, &src, &dst)) {
idxd_vtophys_iter_init(&vtophys_iter, src, dst, len);
idxd_vtophys_iter_init(chan, &vtophys_iter, src, dst, len);
while (len > 0) {
if (first_op == NULL) {
@ -726,7 +743,7 @@ spdk_idxd_submit_dualcast(struct spdk_idxd_io_channel *chan, void *dst1, void *d
return rc;
}
idxd_vtophys_iter_init(&iter_outer, src, dst1, nbytes);
idxd_vtophys_iter_init(chan, &iter_outer, src, dst1, nbytes);
first_op = NULL;
count = 0;
@ -738,7 +755,7 @@ spdk_idxd_submit_dualcast(struct spdk_idxd_io_channel *chan, void *dst1, void *d
goto error;
}
idxd_vtophys_iter_init(&iter_inner, src, dst2, nbytes);
idxd_vtophys_iter_init(chan, &iter_inner, src, dst2, nbytes);
src += outer_seg_len;
nbytes -= outer_seg_len;
@ -824,7 +841,7 @@ spdk_idxd_submit_compare(struct spdk_idxd_io_channel *chan,
len > 0;
len = spdk_ioviter_next(&iter, &src1, &src2)) {
idxd_vtophys_iter_init(&vtophys_iter, src1, src2, len);
idxd_vtophys_iter_init(chan, &vtophys_iter, src1, src2, len);
while (len > 0) {
if (first_op == NULL) {
@ -919,11 +936,15 @@ spdk_idxd_submit_fill(struct spdk_idxd_io_channel *chan,
count++;
seg_len = len;
dst_addr = spdk_vtophys(dst, &seg_len);
if (dst_addr == SPDK_VTOPHYS_ERROR) {
SPDK_ERRLOG("Error translating address\n");
rc = -EFAULT;
goto error;
if (chan->pasid_enabled) {
dst_addr = (uint64_t)dst;
} else {
dst_addr = spdk_vtophys(dst, &seg_len);
if (dst_addr == SPDK_VTOPHYS_ERROR) {
SPDK_ERRLOG("Error translating address\n");
rc = -EFAULT;
goto error;
}
}
seg_len = spdk_min(seg_len, len);
@ -997,11 +1018,15 @@ spdk_idxd_submit_crc32c(struct spdk_idxd_io_channel *chan,
count++;
seg_len = len;
src_addr = spdk_vtophys(src, &seg_len);
if (src_addr == SPDK_VTOPHYS_ERROR) {
SPDK_ERRLOG("Error translating address\n");
rc = -EFAULT;
goto error;
if (chan->pasid_enabled) {
src_addr = (uint64_t)src;
} else {
src_addr = spdk_vtophys(src, &seg_len);
if (src_addr == SPDK_VTOPHYS_ERROR) {
SPDK_ERRLOG("Error translating address\n");
rc = -EFAULT;
goto error;
}
}
seg_len = spdk_min(seg_len, len);
@ -1069,7 +1094,7 @@ spdk_idxd_submit_copy_crc32c(struct spdk_idxd_io_channel *chan,
len = spdk_ioviter_next(&iter, &src, &dst)) {
idxd_vtophys_iter_init(&vtophys_iter, src, dst, len);
idxd_vtophys_iter_init(chan, &vtophys_iter, src, dst, len);
while (len > 0) {
if (first_op == NULL) {
@ -1145,12 +1170,12 @@ _idxd_submit_compress_single(struct spdk_idxd_io_channel *chan, void *dst, const
return rc;
}
rc = _vtophys(src, &src_addr, nbytes_src);
rc = _vtophys(chan, src, &src_addr, nbytes_src);
if (rc) {
goto error;
}
rc = _vtophys(dst, &dst_addr, nbytes_dst);
rc = _vtophys(chan, dst, &dst_addr, nbytes_dst);
if (rc) {
goto error;
}
@ -1213,12 +1238,12 @@ _idxd_submit_decompress_single(struct spdk_idxd_io_channel *chan, void *dst, con
return rc;
}
rc = _vtophys(src, &src_addr, nbytes);
rc = _vtophys(chan, src, &src_addr, nbytes);
if (rc) {
goto error;
}
rc = _vtophys(dst, &dst_addr, nbytes_dst);
rc = _vtophys(chan, dst, &dst_addr, nbytes_dst);
if (rc) {
goto error;
}

View File

@ -74,6 +74,8 @@ struct spdk_idxd_io_channel {
void *portal;
uint32_t portal_offset;
bool pasid_enabled;
/* The currently open batch */
struct idxd_batch *batch;
@ -138,6 +140,7 @@ struct spdk_idxd_device {
uint32_t total_wq_size;
uint32_t chan_per_device;
pthread_mutex_t num_channels_lock;
bool pasid_enabled;
enum idxd_dev type;
struct iaa_aecs *aecs;
uint32_t version;

View File

@ -71,6 +71,7 @@ kernel_idxd_probe(void *cb_ctx, spdk_idxd_attach_cb attach_cb, spdk_idxd_probe_c
enum accfg_device_state dstate;
struct spdk_kernel_idxd_device *kernel_idxd;
struct accfg_wq *wq;
bool pasid_enabled;
/* Make sure that the device is enabled */
dstate = accfg_device_get_state(device);
@ -78,6 +79,17 @@ kernel_idxd_probe(void *cb_ctx, spdk_idxd_attach_cb attach_cb, spdk_idxd_probe_c
continue;
}
pasid_enabled = accfg_device_get_pasid_enabled(device);
if (!pasid_enabled && spdk_iommu_is_enabled()) {
/*
* If the IOMMU is enabled but shared memory mode is not on,
* then we have no way to get the IOVA from userspace to use this
* device or any kernel device. Return an error.
*/
SPDK_ERRLOG("Found kernel IDXD device, but cannot use it when IOMMU is enabled but SM is disabled\n");
return -ENOTSUP;
}
kernel_idxd = calloc(1, sizeof(struct spdk_kernel_idxd_device));
if (kernel_idxd == NULL) {
SPDK_ERRLOG("Failed to allocate memory for kernel_idxd device.\n");
@ -91,6 +103,7 @@ kernel_idxd_probe(void *cb_ctx, spdk_idxd_attach_cb attach_cb, spdk_idxd_probe_c
kernel_idxd->idxd.impl = &g_kernel_idxd_impl;
kernel_idxd->fd = -1;
kernel_idxd->idxd.version = accfg_device_get_version(device);
kernel_idxd->idxd.pasid_enabled = pasid_enabled;
accfg_wq_foreach(device, wq) {
enum accfg_wq_state wstate;
@ -146,8 +159,6 @@ kernel_idxd_probe(void *cb_ctx, spdk_idxd_attach_cb attach_cb, spdk_idxd_probe_c
/* Since we only use a single WQ, the total size is the size of this WQ */
kernel_idxd->idxd.total_wq_size = accfg_wq_get_size(wq);
kernel_idxd->idxd.chan_per_device = (kernel_idxd->idxd.total_wq_size >= 128) ? 8 : 4;
/* TODO: Handle BOF when we add support for shared WQ */
/* wq_ctx->bof = accfg_wq_get_block_on_fault(wq); */
/* We only use a single WQ, so once we've found one we can stop looking. */
break;