lib/idxd: Add compress/decompress support to low level lib

Accel module coming in next patch...

Add support for compress and decompress. The low level IDXD
library supports both DSA and IAA hardware.  There are separate
modules for DSA and IAA.

accel_perf patch follows.

Signed-off-by: paul luse <paul.e.luse@intel.com>
Change-Id: I55014122f6555f80985c11d49a54eddc5d51c337
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12292
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
paul luse 2022-04-18 12:34:57 -07:00 committed by Jim Harris
parent 3ac967baa6
commit 0ff560ea3b
8 changed files with 326 additions and 22 deletions

View File

@ -45,6 +45,9 @@ The RPC `idxd_scan_accel_engine` has been renamed to `dsa_scan_accel_engine`
Many HW related structs/functions with the name `idxd` have been renamed `dsa`
to more accurately represent the HW they are associated with.
Two new functions were added to the library `spdk_idxd_submit_compress` and
`spdk_idxd_submit_decompress`
### accel_fw
A new parameter `flags` was added to accel API.
@ -54,6 +57,8 @@ The APIs include:
`spdk_accel_submit_fill`
`spdk_accel_submit_copy_crc32c`
`spdk_accel_submit_copy_crc32cv`
`spdk_accel_submit_compress`
`spdk_accel_submit_decompress`
A new flag `ACCEL_FLAG_PERSISTENT` was added to indicate the target memory is PMEM.

View File

@ -286,6 +286,53 @@ int spdk_idxd_submit_copy_crc32c(struct spdk_idxd_io_channel *chan,
uint32_t seed, uint32_t *crc_dst, int flags,
spdk_idxd_req_cb cb_fn, void *cb_arg);
/**
* Build and submit an IAA memory compress request.
*
* This function will build the compress descriptor and then immediately submit
* by writing to the proper device portal.
*
* \param chan IDXD channel to submit request.
* \param diov Destination iovec. diov with diovcnt must be large enough to hold compressed data.
* \param diovcnt Number of elements in diov for decompress buffer.
* \param siov Source iovec
* \param siovcnt Number of elements in siov
* \param output_size The size of the compressed data
* \param flags Flags, optional flags that can vary per operation.
* \param cb_fn Callback function which will be called when the request is complete.
* \param cb_arg Opaque value which will be passed back as the arg parameter in
* the completion callback.
*
* \return 0 on success, negative errno on failure.
*/
int spdk_idxd_submit_compress(struct spdk_idxd_io_channel *chan,
struct iovec *diov, uint32_t diovcnt,
struct iovec *siov, uint32_t siovcnt, uint32_t *output_size,
int flags, spdk_idxd_req_cb cb_fn, void *cb_arg);
/**
* Build and submit an IAA memory decompress request.
*
* This function will build the decompress descriptor and then immediately submit
* by writing to the proper device portal.
*
* \param chan IDXD channel to submit request.
* \param diov Destination iovec. diov with diovcnt must be large enough to hold decompressed data.
* \param diovcnt Number of elements in diov for decompress buffer.
* \param siov Source iovec
* \param siovcnt Number of elements in siov
* \param flags Flags, optional flags that can vary per operation.
* \param cb_fn Callback function which will be called when the request is complete.
* \param cb_arg Opaque value which will be passed back as the arg parameter in
* the completion callback.
*
* \return 0 on success, negative errno on failure.
*/
int spdk_idxd_submit_decompress(struct spdk_idxd_io_channel *chan,
struct iovec *diov, uint32_t diovcnt,
struct iovec *siov, uint32_t siovcnt,
int flags, spdk_idxd_req_cb cb_fn, void *cb_arg);
/**
* Check for completed requests on an IDXD channel.
*

View File

@ -65,6 +65,11 @@ extern "C" {
#define IDXD_FLAG_DEST_STEERING_TAG (1 << 15)
#define IDXD_FLAG_CRC_READ_CRC_SEED (1 << 16)
#define IAA_FLAG_RD_SRC2_AECS (1 << 16)
#define IAA_COMP_FLUSH_OUTPUT (1 << 1)
#define IAA_COMP_APPEND_EOB (1 << 2)
#define IAA_COMP_FLAGS (IAA_COMP_FLUSH_OUTPUT | IAA_COMP_APPEND_EOB)
/*
* IDXD is a family of devices, DSA and IAA.
*/
@ -343,6 +348,20 @@ struct iaa_hw_comp_record {
};
SPDK_STATIC_ASSERT(sizeof(struct iaa_hw_comp_record) == 64, "size mismatch");
struct iaa_aecs {
uint32_t crc;
uint32_t xor_checksum;
uint32_t rsvd[5];
uint32_t num_output_accum_bits;
uint8_t output_accum[256];
uint32_t ll_sym[286];
uint32_t rsvd1;
uint32_t rsvd3;
uint32_t d_sym[30];
uint32_t pad[2];
};
SPDK_STATIC_ASSERT(sizeof(struct iaa_aecs) == 1568, "size mismatch");
union idxd_gencap_register {
struct {
uint64_t block_on_fault: 1;

View File

@ -63,6 +63,8 @@ enum dsa_opcode {
IDXD_OPCODE_DIF_STRP = 20,
IDXD_OPCODE_DIF_UPDT = 21,
IDXD_OPCODE_CFLUSH = 32,
IDXD_OPCODE_DECOMPRESS = 66,
IDXD_OPCODE_COMPRESS = 67,
};
#ifdef __cplusplus

View File

@ -218,7 +218,8 @@ spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
struct spdk_idxd_io_channel *chan;
struct idxd_hw_desc *desc;
struct idxd_ops *op;
int i, num_descriptors, rc;
int i, num_descriptors, rc = -1;
uint32_t comp_rec_size;
assert(idxd != NULL);
@ -238,7 +239,8 @@ spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
if (idxd->num_channels == idxd->chan_per_device) {
/* too many channels sharing this device */
pthread_mutex_unlock(&idxd->num_channels_lock);
goto err_chan;
SPDK_ERRLOG("Too many channels sharing this device\n");
goto error;
}
/* Have each channel start at a different offset. */
@ -254,39 +256,46 @@ spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
0x40, NULL,
SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
if (chan->desc_base == NULL) {
SPDK_ERRLOG("Failed to allocate descriptor memory\n");
goto err_chan;
SPDK_ERRLOG("Failed to allocate DSA descriptor memory\n");
goto error;
}
chan->ops_base = op = spdk_zmalloc(num_descriptors * sizeof(struct idxd_ops),
0x40, NULL,
SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
if (chan->ops_base == NULL) {
SPDK_ERRLOG("Failed to allocate completion memory\n");
goto err_op;
SPDK_ERRLOG("Failed to allocate idxd_ops memory\n");
goto error;
}
if (idxd->type == IDXD_DEV_TYPE_DSA) {
comp_rec_size = sizeof(struct dsa_hw_comp_record);
if (_dsa_alloc_batches(chan, num_descriptors)) {
goto error;
}
} else {
comp_rec_size = sizeof(struct iaa_hw_comp_record);
}
for (i = 0; i < num_descriptors; i++) {
STAILQ_INSERT_TAIL(&chan->ops_pool, op, link);
op->desc = desc;
rc = _vtophys(&op->hw, &desc->completion_addr, sizeof(struct dsa_hw_comp_record));
rc = _vtophys(&op->hw, &desc->completion_addr, comp_rec_size);
if (rc) {
SPDK_ERRLOG("Failed to translate completion memory\n");
goto err_op;
goto error;
}
op++;
desc++;
}
if (_dsa_alloc_batches(chan, num_descriptors)) {
return NULL;
}
return chan;
err_op:
error:
spdk_free(chan->ops_base);
chan->ops_base = NULL;
spdk_free(chan->desc_base);
chan->desc_base = NULL;
err_chan:
free(chan);
return NULL;
}
@ -299,6 +308,7 @@ spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan)
struct idxd_batch *batch;
assert(chan != NULL);
assert(chan->idxd != NULL);
if (chan->batch) {
idxd_batch_cancel(chan, -ECANCELED);
@ -338,17 +348,20 @@ idxd_get_impl_by_name(const char *impl_name)
void
spdk_idxd_set_config(bool kernel_mode)
{
if (g_idxd_impl != NULL) {
struct spdk_idxd_impl *tmp;
if (kernel_mode) {
tmp = idxd_get_impl_by_name(KERNEL_DRIVER_NAME);
} else {
tmp = idxd_get_impl_by_name(USERSPACE_DRIVER_NAME);
}
if (g_idxd_impl != NULL && g_idxd_impl != tmp) {
SPDK_ERRLOG("Cannot change idxd implementation after devices are initialized\n");
assert(false);
return;
}
if (kernel_mode) {
g_idxd_impl = idxd_get_impl_by_name(KERNEL_DRIVER_NAME);
} else {
g_idxd_impl = idxd_get_impl_by_name(USERSPACE_DRIVER_NAME);
}
g_idxd_impl = tmp;
if (g_idxd_impl == NULL) {
SPDK_ERRLOG("Cannot set the idxd implementation with %s mode\n",
@ -1125,6 +1138,142 @@ error:
return rc;
}
static inline int
_idxd_submit_compress_single(struct spdk_idxd_io_channel *chan, void *dst, const void *src,
uint64_t nbytes_dst, uint64_t nbytes_src, uint32_t *output_size,
int flags, spdk_idxd_req_cb cb_fn, void *cb_arg)
{
struct idxd_hw_desc *desc;
struct idxd_ops *op;
uint64_t src_addr, dst_addr;
int rc;
/* Common prep. */
rc = _idxd_prep_command(chan, cb_fn, cb_arg, flags, &desc, &op);
if (rc) {
return rc;
}
rc = _vtophys(src, &src_addr, nbytes_src);
if (rc) {
goto error;
}
rc = _vtophys(dst, &dst_addr, nbytes_dst);
if (rc) {
goto error;
}
/* Command specific. */
desc->opcode = IDXD_OPCODE_COMPRESS;
desc->src1_addr = src_addr;
desc->dst_addr = dst_addr;
desc->src1_size = nbytes_src;
desc->iaa.max_dst_size = nbytes_dst;
desc->iaa.src2_size = sizeof(struct iaa_aecs);
desc->iaa.src2_addr = (uint64_t)chan->idxd->aecs;
desc->flags |= IAA_FLAG_RD_SRC2_AECS;
desc->compr_flags = IAA_COMP_FLAGS;
op->output_size = output_size;
_submit_to_hw(chan, op);
return 0;
error:
STAILQ_INSERT_TAIL(&chan->ops_pool, op, link);
return rc;
}
int
spdk_idxd_submit_compress(struct spdk_idxd_io_channel *chan,
struct iovec *diov, uint32_t diovcnt,
struct iovec *siov, uint32_t siovcnt, uint32_t *output_size,
int flags, spdk_idxd_req_cb cb_fn, void *cb_arg)
{
assert(chan != NULL);
assert(diov != NULL);
assert(siov != NULL);
if (diovcnt == 1 && siovcnt == 1) {
/* Simple case - copying one buffer to another */
if (diov[0].iov_len < siov[0].iov_len) {
return -EINVAL;
}
return _idxd_submit_compress_single(chan, diov[0].iov_base, siov[0].iov_base,
diov[0].iov_len, siov[0].iov_len,
output_size, flags, cb_fn, cb_arg);
}
/* TODO: vectored support */
return -EINVAL;
}
static inline int
_idxd_submit_decompress_single(struct spdk_idxd_io_channel *chan, void *dst, const void *src,
uint64_t nbytes_dst, uint64_t nbytes, int flags, spdk_idxd_req_cb cb_fn, void *cb_arg)
{
struct idxd_hw_desc *desc;
struct idxd_ops *op;
uint64_t src_addr, dst_addr;
int rc;
/* Common prep. */
rc = _idxd_prep_command(chan, cb_fn, cb_arg, flags, &desc, &op);
if (rc) {
return rc;
}
rc = _vtophys(src, &src_addr, nbytes);
if (rc) {
goto error;
}
rc = _vtophys(dst, &dst_addr, nbytes_dst);
if (rc) {
goto error;
}
/* Command specific. */
desc->opcode = IDXD_OPCODE_COMPRESS;
desc->src1_addr = src_addr;
desc->dst_addr = dst_addr;
desc->src1_size = nbytes;
desc->iaa.max_dst_size = nbytes_dst;
desc->iaa.src2_size = sizeof(struct iaa_aecs);
desc->iaa.src2_addr = (uint64_t)chan->idxd->aecs;
desc->flags |= IAA_FLAG_RD_SRC2_AECS;
desc->compr_flags = IAA_COMP_FLAGS;
_submit_to_hw(chan, op);
return 0;
error:
STAILQ_INSERT_TAIL(&chan->ops_pool, op, link);
return rc;
}
int
spdk_idxd_submit_decompress(struct spdk_idxd_io_channel *chan,
struct iovec *diov, uint32_t diovcnt,
struct iovec *siov, uint32_t siovcnt,
int flags, spdk_idxd_req_cb cb_fn, void *cb_arg)
{
assert(chan != NULL);
assert(diov != NULL);
assert(siov != NULL);
if (diovcnt == 1 && siovcnt == 1) {
/* Simple case - copying one buffer to another */
if (diov[0].iov_len < siov[0].iov_len) {
return -EINVAL;
}
return _idxd_submit_decompress_single(chan, diov[0].iov_base, siov[0].iov_base,
diov[0].iov_len, siov[0].iov_len,
flags, cb_fn, cb_arg);
}
/* TODO: vectored support */
return -EINVAL;
}
static inline void
_dump_sw_error_reg(struct spdk_idxd_io_channel *chan)
{
@ -1162,7 +1311,9 @@ spdk_idxd_process_events(struct spdk_idxd_io_channel *chan)
STAILQ_REMOVE_HEAD(&chan->ops_outstanding, link);
rc++;
/* Status is in the same location for both IAA and DSA completion records. */
if (spdk_unlikely(IDXD_FAILURE(op->hw.status))) {
SPDK_ERRLOG("Completion status 0x%x\n", op->hw.status);
status = -EINVAL;
_dump_sw_error_reg(chan);
}
@ -1183,6 +1334,11 @@ spdk_idxd_process_events(struct spdk_idxd_io_channel *chan)
status = op->hw.result;
}
break;
case IDXD_OPCODE_COMPRESS:
if (spdk_likely(status == 0 && op->output_size != NULL)) {
*op->output_size = op->iaa_hw.output_size;
}
break;
}
/* TODO: WHAT IF THIS FAILED!? */

View File

@ -137,7 +137,10 @@ struct idxd_ops {
spdk_idxd_req_cb cb_fn;
struct idxd_batch *batch;
struct idxd_hw_desc *desc;
union {
uint32_t *crc_dst;
uint32_t *output_size;
};
struct idxd_ops *parent;
uint32_t count;
STAILQ_ENTRY(idxd_ops) link;
@ -164,6 +167,7 @@ struct spdk_idxd_device {
uint32_t chan_per_device;
pthread_mutex_t num_channels_lock;
enum idxd_dev type;
struct iaa_aecs *aecs;
};
void idxd_impl_register(struct spdk_idxd_impl *impl);

View File

@ -139,6 +139,7 @@ idxd_disable_dev(struct spdk_user_idxd_device *user_idxd)
cmd.command_code = IDXD_DISABLE_DEV;
assert(&user_idxd->registers->cmd.raw); /* scan-build */
spdk_mmio_write_4(&user_idxd->registers->cmd.raw, cmd.raw);
rc = idxd_wait_cmd(user_idxd, IDXD_REGISTER_TIMEOUT_US);
if (rc < 0) {
@ -354,6 +355,9 @@ user_idxd_device_destruct(struct spdk_idxd_device *idxd)
idxd_unmap_pci_bar(user_idxd, IDXD_WQ_BAR);
spdk_pci_device_detach(user_idxd->device);
if (idxd->type == IDXD_DEV_TYPE_IAA) {
spdk_free(idxd->aecs);
}
free(user_idxd);
}
@ -466,6 +470,57 @@ static struct spdk_idxd_impl g_user_idxd_impl = {
.portal_get_addr = user_idxd_portal_get_addr
};
/*
* Fixed Huffman tables the IAA hardware requires to implement RFC-1951.
*/
const uint32_t fixed_ll_sym[286] = {
0x40030, 0x40031, 0x40032, 0x40033, 0x40034, 0x40035, 0x40036, 0x40037,
0x40038, 0x40039, 0x4003A, 0x4003B, 0x4003C, 0x4003D, 0x4003E, 0x4003F,
0x40040, 0x40041, 0x40042, 0x40043, 0x40044, 0x40045, 0x40046, 0x40047,
0x40048, 0x40049, 0x4004A, 0x4004B, 0x4004C, 0x4004D, 0x4004E, 0x4004F,
0x40050, 0x40051, 0x40052, 0x40053, 0x40054, 0x40055, 0x40056, 0x40057,
0x40058, 0x40059, 0x4005A, 0x4005B, 0x4005C, 0x4005D, 0x4005E, 0x4005F,
0x40060, 0x40061, 0x40062, 0x40063, 0x40064, 0x40065, 0x40066, 0x40067,
0x40068, 0x40069, 0x4006A, 0x4006B, 0x4006C, 0x4006D, 0x4006E, 0x4006F,
0x40070, 0x40071, 0x40072, 0x40073, 0x40074, 0x40075, 0x40076, 0x40077,
0x40078, 0x40079, 0x4007A, 0x4007B, 0x4007C, 0x4007D, 0x4007E, 0x4007F,
0x40080, 0x40081, 0x40082, 0x40083, 0x40084, 0x40085, 0x40086, 0x40087,
0x40088, 0x40089, 0x4008A, 0x4008B, 0x4008C, 0x4008D, 0x4008E, 0x4008F,
0x40090, 0x40091, 0x40092, 0x40093, 0x40094, 0x40095, 0x40096, 0x40097,
0x40098, 0x40099, 0x4009A, 0x4009B, 0x4009C, 0x4009D, 0x4009E, 0x4009F,
0x400A0, 0x400A1, 0x400A2, 0x400A3, 0x400A4, 0x400A5, 0x400A6, 0x400A7,
0x400A8, 0x400A9, 0x400AA, 0x400AB, 0x400AC, 0x400AD, 0x400AE, 0x400AF,
0x400B0, 0x400B1, 0x400B2, 0x400B3, 0x400B4, 0x400B5, 0x400B6, 0x400B7,
0x400B8, 0x400B9, 0x400BA, 0x400BB, 0x400BC, 0x400BD, 0x400BE, 0x400BF,
0x48190, 0x48191, 0x48192, 0x48193, 0x48194, 0x48195, 0x48196, 0x48197,
0x48198, 0x48199, 0x4819A, 0x4819B, 0x4819C, 0x4819D, 0x4819E, 0x4819F,
0x481A0, 0x481A1, 0x481A2, 0x481A3, 0x481A4, 0x481A5, 0x481A6, 0x481A7,
0x481A8, 0x481A9, 0x481AA, 0x481AB, 0x481AC, 0x481AD, 0x481AE, 0x481AF,
0x481B0, 0x481B1, 0x481B2, 0x481B3, 0x481B4, 0x481B5, 0x481B6, 0x481B7,
0x481B8, 0x481B9, 0x481BA, 0x481BB, 0x481BC, 0x481BD, 0x481BE, 0x481BF,
0x481C0, 0x481C1, 0x481C2, 0x481C3, 0x481C4, 0x481C5, 0x481C6, 0x481C7,
0x481C8, 0x481C9, 0x481CA, 0x481CB, 0x481CC, 0x481CD, 0x481CE, 0x481CF,
0x481D0, 0x481D1, 0x481D2, 0x481D3, 0x481D4, 0x481D5, 0x481D6, 0x481D7,
0x481D8, 0x481D9, 0x481DA, 0x481DB, 0x481DC, 0x481DD, 0x481DE, 0x481DF,
0x481E0, 0x481E1, 0x481E2, 0x481E3, 0x481E4, 0x481E5, 0x481E6, 0x481E7,
0x481E8, 0x481E9, 0x481EA, 0x481EB, 0x481EC, 0x481ED, 0x481EE, 0x481EF,
0x481F0, 0x481F1, 0x481F2, 0x481F3, 0x481F4, 0x481F5, 0x481F6, 0x481F7,
0x481F8, 0x481F9, 0x481FA, 0x481FB, 0x481FC, 0x481FD, 0x481FE, 0x481FF,
0x38000, 0x38001, 0x38002, 0x38003, 0x38004, 0x38005, 0x38006, 0x38007,
0x38008, 0x38009, 0x3800A, 0x3800B, 0x3800C, 0x3800D, 0x3800E, 0x3800F,
0x38010, 0x38011, 0x38012, 0x38013, 0x38014, 0x38015, 0x38016, 0x38017,
0x400C0, 0x400C1, 0x400C2, 0x400C3, 0x400C4, 0x400C5
};
const uint32_t fixed_d_sym[30] = {
0x28000, 0x28001, 0x28002, 0x28003, 0x28004, 0x28005, 0x28006, 0x28007,
0x28008, 0x28009, 0x2800A, 0x2800B, 0x2800C, 0x2800D, 0x2800E, 0x2800F,
0x28010, 0x28011, 0x28012, 0x28013, 0x28014, 0x28015, 0x28016, 0x28017,
0x28018, 0x28019, 0x2801A, 0x2801B, 0x2801C, 0x2801D
};
#define DYNAMIC_HDR 0x2
#define DYNAMIC_HDR_SIZE 3
/* Caller must hold g_driver_lock */
static struct spdk_idxd_device *
idxd_attach(struct spdk_pci_device *device)
@ -487,6 +542,20 @@ idxd_attach(struct spdk_pci_device *device)
idxd->type = IDXD_DEV_TYPE_DSA;
} else if (did == PCI_DEVICE_ID_INTEL_IAA) {
idxd->type = IDXD_DEV_TYPE_IAA;
idxd->aecs = spdk_zmalloc(sizeof(struct iaa_aecs),
0x20, NULL,
SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
if (idxd->aecs == NULL) {
SPDK_ERRLOG("Failed to allocate iaa aecs\n");
goto err;
}
/* Configure aecs table using fixed Huffman table */
idxd->aecs->output_accum[0] = DYNAMIC_HDR | 1;
idxd->aecs->num_output_accum_bits = DYNAMIC_HDR_SIZE;
/* Add Huffman table to aecs */
memcpy(idxd->aecs->ll_sym, fixed_ll_sym, sizeof(fixed_ll_sym));
memcpy(idxd->aecs->d_sym, fixed_d_sym, sizeof(fixed_d_sym));
}
user_idxd->device = device;

View File

@ -22,6 +22,8 @@
spdk_idxd_submit_copy;
spdk_idxd_submit_dualcast;
spdk_idxd_submit_fill;
spdk_idxd_submit_compress;
spdk_idxd_submit_decompress;
spdk_idxd_process_events;
spdk_idxd_get_channel;
spdk_idxd_put_channel;