idxd: For kernel mode, handle IOMMU+SM mode
If the kernel is booted with the IOMMU enabled and Shared Memory mode enabled (which are the expected boot parameters for production servers), then the kernel idxd driver will automatically register a dedicated work queue with the PASID for the process that opens it. This means that the descriptors written into the portal for that work queue should be *virtual* addresses. If the IOMMU is enabled but Shared Memory mode is disabled, then the kernel has registered the device with the IOMMU and assigned it I/O virtual addresses. We have no way to get those addresses from user space, so we cannot use the kernel driver in this mode. Add a check to catch that. If the IOMMU is disabled, then physical addresses are used everywherre. Change-Id: I0bf079835ad4df1128ef9db54f5564050327e9f7 Signed-off-by: Ben Walker <benjamin.walker@intel.com> Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14019 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Mellanox Build Bot Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Paul Luse <paul.e.luse@intel.com> Reviewed-by: Konrad Sztyber <konrad.sztyber@intel.com>
This commit is contained in:
parent
1c098401d8
commit
2371a070c8
@ -45,10 +45,16 @@ _submit_to_hw(struct spdk_idxd_io_channel *chan, struct idxd_ops *op)
|
||||
}
|
||||
|
||||
inline static int
|
||||
_vtophys(const void *buf, uint64_t *buf_addr, uint64_t size)
|
||||
_vtophys(struct spdk_idxd_io_channel *chan, const void *buf, uint64_t *buf_addr, uint64_t size)
|
||||
{
|
||||
uint64_t updated_size = size;
|
||||
|
||||
if (chan->pasid_enabled) {
|
||||
/* We can just use virtual addresses */
|
||||
*buf_addr = (uint64_t)buf;
|
||||
return 0;
|
||||
}
|
||||
|
||||
*buf_addr = spdk_vtophys(buf, &updated_size);
|
||||
|
||||
if (*buf_addr == SPDK_VTOPHYS_ERROR) {
|
||||
@ -70,16 +76,20 @@ struct idxd_vtophys_iter {
|
||||
uint64_t len;
|
||||
|
||||
uint64_t offset;
|
||||
|
||||
bool pasid_enabled;
|
||||
};
|
||||
|
||||
static void
|
||||
idxd_vtophys_iter_init(struct idxd_vtophys_iter *iter,
|
||||
idxd_vtophys_iter_init(struct spdk_idxd_io_channel *chan,
|
||||
struct idxd_vtophys_iter *iter,
|
||||
const void *src, void *dst, uint64_t len)
|
||||
{
|
||||
iter->src = src;
|
||||
iter->dst = dst;
|
||||
iter->len = len;
|
||||
iter->offset = 0;
|
||||
iter->pasid_enabled = chan->pasid_enabled;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
@ -97,6 +107,12 @@ idxd_vtophys_iter_next(struct idxd_vtophys_iter *iter,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (iter->pasid_enabled) {
|
||||
*src_phys = (uint64_t)src;
|
||||
*dst_phys = (uint64_t)dst;
|
||||
return iter->len;
|
||||
}
|
||||
|
||||
len = iter->len - iter->offset;
|
||||
|
||||
src_off = len;
|
||||
@ -145,7 +161,7 @@ _dsa_alloc_batches(struct spdk_idxd_io_channel *chan, int num_descriptors)
|
||||
goto error_user;
|
||||
}
|
||||
|
||||
rc = _vtophys(batch->user_desc, &batch->user_desc_addr,
|
||||
rc = _vtophys(chan, batch->user_desc, &batch->user_desc_addr,
|
||||
DESC_PER_BATCH * sizeof(struct idxd_hw_desc));
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Failed to translate batch descriptor memory\n");
|
||||
@ -161,7 +177,7 @@ _dsa_alloc_batches(struct spdk_idxd_io_channel *chan, int num_descriptors)
|
||||
}
|
||||
|
||||
for (j = 0; j < DESC_PER_BATCH; j++) {
|
||||
rc = _vtophys(&op->hw, &desc->completion_addr, sizeof(struct dsa_hw_comp_record));
|
||||
rc = _vtophys(chan, &op->hw, &desc->completion_addr, sizeof(struct dsa_hw_comp_record));
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Failed to translate batch entry completion memory\n");
|
||||
goto error_user;
|
||||
@ -208,6 +224,7 @@ spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
|
||||
}
|
||||
|
||||
chan->idxd = idxd;
|
||||
chan->pasid_enabled = idxd->pasid_enabled;
|
||||
STAILQ_INIT(&chan->ops_pool);
|
||||
TAILQ_INIT(&chan->batch_pool);
|
||||
STAILQ_INIT(&chan->ops_outstanding);
|
||||
@ -258,7 +275,7 @@ spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
|
||||
for (i = 0; i < num_descriptors; i++) {
|
||||
STAILQ_INSERT_TAIL(&chan->ops_pool, op, link);
|
||||
op->desc = desc;
|
||||
rc = _vtophys(&op->hw, &desc->completion_addr, comp_rec_size);
|
||||
rc = _vtophys(chan, &op->hw, &desc->completion_addr, comp_rec_size);
|
||||
if (rc) {
|
||||
SPDK_ERRLOG("Failed to translate completion memory\n");
|
||||
goto error;
|
||||
@ -650,7 +667,7 @@ spdk_idxd_submit_copy(struct spdk_idxd_io_channel *chan,
|
||||
len > 0;
|
||||
len = spdk_ioviter_next(&iter, &src, &dst)) {
|
||||
|
||||
idxd_vtophys_iter_init(&vtophys_iter, src, dst, len);
|
||||
idxd_vtophys_iter_init(chan, &vtophys_iter, src, dst, len);
|
||||
|
||||
while (len > 0) {
|
||||
if (first_op == NULL) {
|
||||
@ -726,7 +743,7 @@ spdk_idxd_submit_dualcast(struct spdk_idxd_io_channel *chan, void *dst1, void *d
|
||||
return rc;
|
||||
}
|
||||
|
||||
idxd_vtophys_iter_init(&iter_outer, src, dst1, nbytes);
|
||||
idxd_vtophys_iter_init(chan, &iter_outer, src, dst1, nbytes);
|
||||
|
||||
first_op = NULL;
|
||||
count = 0;
|
||||
@ -738,7 +755,7 @@ spdk_idxd_submit_dualcast(struct spdk_idxd_io_channel *chan, void *dst1, void *d
|
||||
goto error;
|
||||
}
|
||||
|
||||
idxd_vtophys_iter_init(&iter_inner, src, dst2, nbytes);
|
||||
idxd_vtophys_iter_init(chan, &iter_inner, src, dst2, nbytes);
|
||||
|
||||
src += outer_seg_len;
|
||||
nbytes -= outer_seg_len;
|
||||
@ -824,7 +841,7 @@ spdk_idxd_submit_compare(struct spdk_idxd_io_channel *chan,
|
||||
len > 0;
|
||||
len = spdk_ioviter_next(&iter, &src1, &src2)) {
|
||||
|
||||
idxd_vtophys_iter_init(&vtophys_iter, src1, src2, len);
|
||||
idxd_vtophys_iter_init(chan, &vtophys_iter, src1, src2, len);
|
||||
|
||||
while (len > 0) {
|
||||
if (first_op == NULL) {
|
||||
@ -919,11 +936,15 @@ spdk_idxd_submit_fill(struct spdk_idxd_io_channel *chan,
|
||||
count++;
|
||||
|
||||
seg_len = len;
|
||||
dst_addr = spdk_vtophys(dst, &seg_len);
|
||||
if (dst_addr == SPDK_VTOPHYS_ERROR) {
|
||||
SPDK_ERRLOG("Error translating address\n");
|
||||
rc = -EFAULT;
|
||||
goto error;
|
||||
if (chan->pasid_enabled) {
|
||||
dst_addr = (uint64_t)dst;
|
||||
} else {
|
||||
dst_addr = spdk_vtophys(dst, &seg_len);
|
||||
if (dst_addr == SPDK_VTOPHYS_ERROR) {
|
||||
SPDK_ERRLOG("Error translating address\n");
|
||||
rc = -EFAULT;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
seg_len = spdk_min(seg_len, len);
|
||||
@ -997,11 +1018,15 @@ spdk_idxd_submit_crc32c(struct spdk_idxd_io_channel *chan,
|
||||
count++;
|
||||
|
||||
seg_len = len;
|
||||
src_addr = spdk_vtophys(src, &seg_len);
|
||||
if (src_addr == SPDK_VTOPHYS_ERROR) {
|
||||
SPDK_ERRLOG("Error translating address\n");
|
||||
rc = -EFAULT;
|
||||
goto error;
|
||||
if (chan->pasid_enabled) {
|
||||
src_addr = (uint64_t)src;
|
||||
} else {
|
||||
src_addr = spdk_vtophys(src, &seg_len);
|
||||
if (src_addr == SPDK_VTOPHYS_ERROR) {
|
||||
SPDK_ERRLOG("Error translating address\n");
|
||||
rc = -EFAULT;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
seg_len = spdk_min(seg_len, len);
|
||||
@ -1069,7 +1094,7 @@ spdk_idxd_submit_copy_crc32c(struct spdk_idxd_io_channel *chan,
|
||||
len = spdk_ioviter_next(&iter, &src, &dst)) {
|
||||
|
||||
|
||||
idxd_vtophys_iter_init(&vtophys_iter, src, dst, len);
|
||||
idxd_vtophys_iter_init(chan, &vtophys_iter, src, dst, len);
|
||||
|
||||
while (len > 0) {
|
||||
if (first_op == NULL) {
|
||||
@ -1145,12 +1170,12 @@ _idxd_submit_compress_single(struct spdk_idxd_io_channel *chan, void *dst, const
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = _vtophys(src, &src_addr, nbytes_src);
|
||||
rc = _vtophys(chan, src, &src_addr, nbytes_src);
|
||||
if (rc) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
rc = _vtophys(dst, &dst_addr, nbytes_dst);
|
||||
rc = _vtophys(chan, dst, &dst_addr, nbytes_dst);
|
||||
if (rc) {
|
||||
goto error;
|
||||
}
|
||||
@ -1213,12 +1238,12 @@ _idxd_submit_decompress_single(struct spdk_idxd_io_channel *chan, void *dst, con
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = _vtophys(src, &src_addr, nbytes);
|
||||
rc = _vtophys(chan, src, &src_addr, nbytes);
|
||||
if (rc) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
rc = _vtophys(dst, &dst_addr, nbytes_dst);
|
||||
rc = _vtophys(chan, dst, &dst_addr, nbytes_dst);
|
||||
if (rc) {
|
||||
goto error;
|
||||
}
|
||||
|
@ -74,6 +74,8 @@ struct spdk_idxd_io_channel {
|
||||
void *portal;
|
||||
uint32_t portal_offset;
|
||||
|
||||
bool pasid_enabled;
|
||||
|
||||
/* The currently open batch */
|
||||
struct idxd_batch *batch;
|
||||
|
||||
@ -138,6 +140,7 @@ struct spdk_idxd_device {
|
||||
uint32_t total_wq_size;
|
||||
uint32_t chan_per_device;
|
||||
pthread_mutex_t num_channels_lock;
|
||||
bool pasid_enabled;
|
||||
enum idxd_dev type;
|
||||
struct iaa_aecs *aecs;
|
||||
uint32_t version;
|
||||
|
@ -71,6 +71,7 @@ kernel_idxd_probe(void *cb_ctx, spdk_idxd_attach_cb attach_cb, spdk_idxd_probe_c
|
||||
enum accfg_device_state dstate;
|
||||
struct spdk_kernel_idxd_device *kernel_idxd;
|
||||
struct accfg_wq *wq;
|
||||
bool pasid_enabled;
|
||||
|
||||
/* Make sure that the device is enabled */
|
||||
dstate = accfg_device_get_state(device);
|
||||
@ -78,6 +79,17 @@ kernel_idxd_probe(void *cb_ctx, spdk_idxd_attach_cb attach_cb, spdk_idxd_probe_c
|
||||
continue;
|
||||
}
|
||||
|
||||
pasid_enabled = accfg_device_get_pasid_enabled(device);
|
||||
if (!pasid_enabled && spdk_iommu_is_enabled()) {
|
||||
/*
|
||||
* If the IOMMU is enabled but shared memory mode is not on,
|
||||
* then we have no way to get the IOVA from userspace to use this
|
||||
* device or any kernel device. Return an error.
|
||||
*/
|
||||
SPDK_ERRLOG("Found kernel IDXD device, but cannot use it when IOMMU is enabled but SM is disabled\n");
|
||||
return -ENOTSUP;
|
||||
}
|
||||
|
||||
kernel_idxd = calloc(1, sizeof(struct spdk_kernel_idxd_device));
|
||||
if (kernel_idxd == NULL) {
|
||||
SPDK_ERRLOG("Failed to allocate memory for kernel_idxd device.\n");
|
||||
@ -91,6 +103,7 @@ kernel_idxd_probe(void *cb_ctx, spdk_idxd_attach_cb attach_cb, spdk_idxd_probe_c
|
||||
kernel_idxd->idxd.impl = &g_kernel_idxd_impl;
|
||||
kernel_idxd->fd = -1;
|
||||
kernel_idxd->idxd.version = accfg_device_get_version(device);
|
||||
kernel_idxd->idxd.pasid_enabled = pasid_enabled;
|
||||
|
||||
accfg_wq_foreach(device, wq) {
|
||||
enum accfg_wq_state wstate;
|
||||
@ -146,8 +159,6 @@ kernel_idxd_probe(void *cb_ctx, spdk_idxd_attach_cb attach_cb, spdk_idxd_probe_c
|
||||
/* Since we only use a single WQ, the total size is the size of this WQ */
|
||||
kernel_idxd->idxd.total_wq_size = accfg_wq_get_size(wq);
|
||||
kernel_idxd->idxd.chan_per_device = (kernel_idxd->idxd.total_wq_size >= 128) ? 8 : 4;
|
||||
/* TODO: Handle BOF when we add support for shared WQ */
|
||||
/* wq_ctx->bof = accfg_wq_get_block_on_fault(wq); */
|
||||
|
||||
/* We only use a single WQ, so once we've found one we can stop looking. */
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user