From 17bb748a60ac944ff9a13bb4eb0bc1cf8150d4a5 Mon Sep 17 00:00:00 2001
From: paul luse <paul.e.luse@intel.com>
Date: Tue, 28 Jul 2020 13:51:20 -0400
Subject: [PATCH] accel:  Move non-engine specific batch to the accel_fw layer

The new design:

* Supports a generic batching capability in the accel_fw layer
that keeps track of hw accelerated vs sw commands based on
the capabilities of the engine and processes sw commands in the
generic layer while sending a list of commands (not a batch)
to the engines for processing.

* Batch completions are managed via the generic layer, when using
the accel_fw the engines only process commands. With DSA however,
if a list of commands is sent down it will use the DSA public
API to create and send batches but will rely on the generic layer
to complete the batch task itself. When using DSA directly, batching
works as usual (DSA handles batch completion).

* The engine function tables were greatly simplified by replacing
all of the individual entries (copy, fill, crc32c, etc) with one
`submit_tasks` function that is used to both send lists of tasks
for batches or just one task for single shot API.

* Internally batching is now used to re-submit tasks that were queued
for flow control reasons.

Signed-off-by: paul luse <paul.e.luse@intel.com>
Change-Id: I99c28751df32017c43490a90f4904bdabe79a270
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3555
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Community-CI: Mellanox Build Bot
---
 include/spdk/accel_engine.h           |   13 +-
 include/spdk_internal/accel_engine.h  |   83 +-
 lib/accel/accel_engine.c              | 1067 +++++++++++--------------
 lib/accel/spdk_accel.map              |    2 +-
 mk/spdk.lib_deps.mk                   |    2 +-
 module/accel/idxd/accel_engine_idxd.c |  626 +++++----------
 module/accel/ioat/accel_engine_ioat.c |  359 +--------
 7 files changed, 723 insertions(+), 1429 deletions(-)

diff --git a/include/spdk/accel_engine.h b/include/spdk/accel_engine.h
index d682ad1c4..e8056d34e 100644
--- a/include/spdk/accel_engine.h
+++ b/include/spdk/accel_engine.h
@@ -49,9 +49,8 @@ enum accel_capability {
 	ACCEL_FILL		= 1 << 1,
 	ACCEL_DUALCAST		= 1 << 2,
 	ACCEL_COMPARE		= 1 << 3,
-	ACCEL_BATCH		= 1 << 4,
-	ACCEL_CRC32C		= 1 << 5,
-	ACCEL_DIF		= 1 << 6,
+	ACCEL_CRC32C		= 1 << 4,
+	ACCEL_DIF		= 1 << 5,
 };
 
 /**
@@ -69,10 +68,6 @@ typedef void (*spdk_accel_completion_cb)(void *ref, int status);
  */
 typedef void (*spdk_accel_fini_cb)(void *cb_arg);
 
-struct spdk_io_channel;
-
-struct spdk_accel_batch;
-
 /**
  * Initialize the acceleration engine.
  *
@@ -103,11 +98,11 @@ void spdk_accel_engine_module_finish(void);
 struct spdk_io_channel *spdk_accel_engine_get_io_channel(void);
 
 /**
- * Retrieve accel engine capabilities.
+ * Retrieve accel engine HW acceleration capabilities.
  *
  * \param ch I/O channel associated with this call.
  *
- * \return bitmap of capabilities defined by enum accel_capability.
+ * \return bitmap of HW acceleration capabilities defined by enum accel_capability.
  */
 uint64_t spdk_accel_get_capabilities(struct spdk_io_channel *ch);
 
diff --git a/include/spdk_internal/accel_engine.h b/include/spdk_internal/accel_engine.h
index 9fa77f30b..1433ce0a8 100644
--- a/include/spdk_internal/accel_engine.h
+++ b/include/spdk_internal/accel_engine.h
@@ -39,44 +39,67 @@
 #include "spdk/accel_engine.h"
 #include "spdk/queue.h"
 
-struct spdk_accel_task {
-	spdk_accel_completion_cb	cb;
+struct spdk_accel_task;
+
+void spdk_accel_task_complete(struct spdk_accel_task *task, int status);
+
+struct accel_io_channel {
+	struct spdk_accel_engine	*engine;
+	struct spdk_io_channel		*engine_ch;
+	void				*task_pool_base;
+	TAILQ_HEAD(, spdk_accel_task)	task_pool;
+	void				*batch_pool_base;
+	TAILQ_HEAD(, spdk_accel_batch)	batch_pool;
+	TAILQ_HEAD(, spdk_accel_batch)	batches;
+};
+
+struct spdk_accel_batch {
+	/* Lists of commands in the batch. */
+	TAILQ_HEAD(, spdk_accel_task)	hw_tasks;
+	TAILQ_HEAD(, spdk_accel_task)	sw_tasks;
+	/* Specific to the batch task itself. */
+	int				status;
+	uint32_t			count;
+	spdk_accel_completion_cb	cb_fn;
 	void				*cb_arg;
 	struct accel_io_channel		*accel_ch;
+	TAILQ_ENTRY(spdk_accel_batch)	link;
+};
+
+enum accel_opcode {
+	ACCEL_OPCODE_MEMMOVE	= 0,
+	ACCEL_OPCODE_MEMFILL	= 1,
+	ACCEL_OPCODE_COMPARE	= 2,
+	ACCEL_OPCODE_BATCH	= 3,
+	ACCEL_OPCODE_CRC32C	= 4,
+	ACCEL_OPCODE_DUALCAST	= 5,
+};
+
+struct spdk_accel_task {
+	struct accel_io_channel		*accel_ch;
+	struct spdk_accel_batch		*batch;
+	spdk_accel_completion_cb	cb_fn;
+	void				*cb_arg;
+	void				*src;
+	union {
+		void			*dst;
+		void			*src2;
+	};
+	void				*dst2;
+	uint32_t			seed;
+	uint64_t			fill_pattern;
+	enum accel_opcode		op_code;
+	uint64_t			nbytes;
 	TAILQ_ENTRY(spdk_accel_task)	link;
-	uint8_t				offload_ctx[0];
+	uint8_t				offload_ctx[0]; /* Not currently used. */
 };
 
 struct spdk_accel_engine {
+	uint64_t capabilities;
 	uint64_t (*get_capabilities)(void);
-	int (*copy)(struct spdk_io_channel *ch, void *dst, void *src,
-		    uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*dualcast)(struct spdk_io_channel *ch, void *dst1, void *dst2, void *src,
-			uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	uint32_t (*batch_get_max)(void);
-	struct spdk_accel_batch *(*batch_create)(struct spdk_io_channel *ch);
-	int (*batch_prep_copy)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			       void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_prep_dualcast)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-				   void *dst1, void *dst2, void *src, uint64_t nbytes,
-				   spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_prep_compare)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-				  void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_prep_fill)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			       void *dst, uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_prep_crc32c)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-				 uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
-				 spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_submit)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			    spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_cancel)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch);
-	int (*compare)(struct spdk_io_channel *ch, void *src1, void *src2,
-		       uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*fill)(struct spdk_io_channel *ch, void *dst, uint8_t fill,
-		    uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*crc32c)(struct spdk_io_channel *ch, uint32_t *dst, void *src,
-		      uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
 	struct spdk_io_channel *(*get_io_channel)(void);
+	uint32_t (*batch_get_max)(struct spdk_io_channel *ch);
+	int (*submit_tasks)(struct spdk_io_channel *ch, struct spdk_accel_task *accel_task);
 };
 
 struct spdk_accel_module_if {
diff --git a/lib/accel/accel_engine.c b/lib/accel/accel_engine.c
index ce260ccbf..b65528984 100644
--- a/lib/accel/accel_engine.c
+++ b/lib/accel/accel_engine.c
@@ -40,16 +40,19 @@
 #include "spdk/thread.h"
 #include "spdk/json.h"
 #include "spdk/crc32.h"
+#include "spdk/util.h"
 
 /* Accelerator Engine Framework: The following provides a top level
  * generic API for the accelerator functions defined here. Modules,
- * such as the one in /module/accel/ioat, supply the implemention of
+ * such as the one in /module/accel/ioat, supply the implemention
  * with the exception of the pure software implemention contained
  * later in this file.
  */
 
-#define ALIGN_4K		0x1000
-#define MAX_TASKS_PER_CHANNEL	0x400
+#define ALIGN_4K			0x1000
+#define MAX_TASKS_PER_CHANNEL		0x800
+#define MAX_BATCH_SIZE			0x80
+#define MAX_NUM_BATCHES_PER_CHANNEL	(MAX_TASKS_PER_CHANNEL / MAX_BATCH_SIZE)
 
 /* Largest context size for all accel modules */
 static size_t g_max_accel_module_size = 0;
@@ -64,27 +67,11 @@ static void *g_fini_cb_arg = NULL;
 static TAILQ_HEAD(, spdk_accel_module_if) spdk_accel_module_list =
 	TAILQ_HEAD_INITIALIZER(spdk_accel_module_list);
 
-struct accel_io_channel {
-	struct spdk_accel_engine	*engine;
-	struct spdk_io_channel		*ch;
-	void				*task_pool_base;
-	TAILQ_HEAD(, spdk_accel_task)	task_pool;
-};
-
-/* Forward declarations of software implementations used when an
- * engine has not implemented the capability.
- */
-static int sw_accel_submit_dualcast(struct spdk_io_channel *ch, void *dst1, void *dst2, void *src,
-				    uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-static int sw_accel_submit_copy(struct spdk_io_channel *ch, void *dst, void *src,
-				uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-static int sw_accel_submit_compare(struct spdk_io_channel *ch, void *src1, void *src2,
-				   uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-static int sw_accel_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill,
-				uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-static int sw_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src,
-				  uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn,
-				  void *cb_arg);
+static void _sw_accel_dualcast(void *dst1, void *dst2, void *src, uint64_t nbytes);
+static void _sw_accel_copy(void *dst, void *src, uint64_t nbytes);
+static int _sw_accel_compare(void *src1, void *src2, uint64_t nbytes);
+static void _sw_accel_fill(void *dst, uint8_t fill, uint64_t nbytes);
+static void _sw_accel_crc32c(uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes);
 
 /* Registration of hw modules (currently supports only 1 at a time) */
 void
@@ -111,38 +98,82 @@ accel_sw_unregister(void)
 	g_sw_accel_engine = NULL;
 }
 
-/* Common completion routine, called only by the accel framework */
-static void
-_accel_engine_done(void *ref, int status)
+/* Used to determine whether a command is sent to an engine/module or done here
+ * via SW implementation.
+ */
+inline static bool
+_is_supported(struct spdk_accel_engine *engine, enum accel_capability operation)
 {
-	struct spdk_accel_task *accel_task = (struct spdk_accel_task *)ref;
-
-	accel_task->cb(accel_task->cb_arg, status);
-	TAILQ_INSERT_TAIL(&accel_task->accel_ch->task_pool, accel_task, link);
+	return ((engine->capabilities & operation) == operation);
 }
 
+void
+spdk_accel_task_complete(struct spdk_accel_task *accel_task, int status)
+{
+	struct accel_io_channel *accel_ch = accel_task->accel_ch;
+	struct spdk_accel_batch *batch;
+
+	accel_task->cb_fn(accel_task->cb_arg, status);
+
+	/* If this task is part of a batch, check for completion of the batch. */
+	if (accel_task->batch) {
+		batch = accel_task->batch;
+		assert(batch->count > 0);
+		batch->count--;
+		if (batch->count == 0) {
+			SPDK_DEBUGLOG(accel, "Batch %p count %d\n", batch, batch->count);
+			if (batch->cb_fn) {
+				batch->cb_fn(batch->cb_arg, batch->status);
+			}
+			TAILQ_REMOVE(&accel_ch->batches, batch, link);
+			TAILQ_INSERT_TAIL(&accel_ch->batch_pool, batch, link);
+		}
+	}
+
+	TAILQ_INSERT_TAIL(&accel_ch->task_pool, accel_task, link);
+}
+
+/* Accel framework public API for discovering current engine capabilities. */
 uint64_t
 spdk_accel_get_capabilities(struct spdk_io_channel *ch)
 {
 	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
 
-	/* All engines are required to implement this API. */
-	return accel_ch->engine->get_capabilities();
+	return accel_ch->engine->capabilities;
+}
+
+inline static bool
+_is_batch_valid(struct spdk_accel_batch *batch, struct accel_io_channel *accel_ch)
+{
+	return (batch->accel_ch == accel_ch);
 }
 
 inline static struct spdk_accel_task *
-_get_task(struct accel_io_channel *accel_ch, spdk_accel_completion_cb cb_fn, void *cb_arg)
+_get_task(struct accel_io_channel *accel_ch, struct spdk_accel_batch *batch,
+	  spdk_accel_completion_cb cb_fn, void *cb_arg)
 {
-	struct spdk_accel_task *accel_task = TAILQ_FIRST(&accel_ch->task_pool);
+	struct spdk_accel_task *accel_task;
 
+	if (batch && _is_batch_valid(batch, accel_ch) == false) {
+		SPDK_ERRLOG("Attempt to access an invalid batch.\n.");
+		return NULL;
+	}
+
+	accel_task = TAILQ_FIRST(&accel_ch->task_pool);
 	if (accel_task == NULL) {
 		return NULL;
 	}
 	TAILQ_REMOVE(&accel_ch->task_pool, accel_task, link);
+	accel_task->link.tqe_next = NULL;
+	accel_task->link.tqe_prev = NULL;
 
-	accel_task->cb = cb_fn;
+	accel_task->cb_fn = cb_fn;
 	accel_task->cb_arg = cb_arg;
 	accel_task->accel_ch = accel_ch;
+	accel_task->batch = batch;
+	if (batch) {
+		batch->count++;
+	}
 
 	return accel_task;
 }
@@ -155,18 +186,22 @@ spdk_accel_submit_copy(struct spdk_io_channel *ch, void *dst, void *src, uint64_
 	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
 	struct spdk_accel_task *accel_task;
 
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
+	accel_task = _get_task(accel_ch, NULL, cb_fn, cb_arg);
 	if (accel_task == NULL) {
 		return -ENOMEM;
 	}
 
-	/* If the engine does not support it, fallback to the sw implementation. */
-	if (accel_ch->engine->copy) {
-		return accel_ch->engine->copy(accel_ch->ch, dst, src, nbytes,
-					      _accel_engine_done, accel_task->offload_ctx);
+	accel_task->dst = dst;
+	accel_task->src = src;
+	accel_task->op_code = ACCEL_OPCODE_MEMMOVE;
+	accel_task->nbytes = nbytes;
+
+	if (_is_supported(accel_ch->engine, ACCEL_COPY)) {
+		return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
 	} else {
-		return sw_accel_submit_copy(accel_ch->ch, dst, src, nbytes,
-					    _accel_engine_done, accel_task->offload_ctx);
+		_sw_accel_copy(dst, src, nbytes);
+		spdk_accel_task_complete(accel_task, 0);
+		return 0;
 	}
 }
 
@@ -183,176 +218,26 @@ spdk_accel_submit_dualcast(struct spdk_io_channel *ch, void *dst1, void *dst2, v
 		return -EINVAL;
 	}
 
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
+	accel_task = _get_task(accel_ch, NULL, cb_fn, cb_arg);
 	if (accel_task == NULL) {
 		return -ENOMEM;
 	}
 
-	/* If the engine does not support it, fallback to the sw implementation. */
-	if (accel_ch->engine->dualcast) {
-		return accel_ch->engine->dualcast(accel_ch->ch, dst1, dst2, src, nbytes,
-						  _accel_engine_done, accel_task->offload_ctx);
+	accel_task->src = src;
+	accel_task->dst = dst1;
+	accel_task->dst2 = dst2;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_DUALCAST;
+
+	if (_is_supported(accel_ch->engine, ACCEL_DUALCAST)) {
+		return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
 	} else {
-		return sw_accel_submit_dualcast(accel_ch->ch, dst1, dst2, src, nbytes,
-						_accel_engine_done, accel_task->offload_ctx);
+		_sw_accel_dualcast(dst1, dst2, src, nbytes);
+		spdk_accel_task_complete(accel_task, 0);
+		return 0;
 	}
 }
 
-/* Accel framework public API for batch_create function. All engines are
- * required to implement this API.
- */
-struct spdk_accel_batch *
-spdk_accel_batch_create(struct spdk_io_channel *ch)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-
-	return accel_ch->engine->batch_create(accel_ch->ch);
-}
-
-/* Accel framework public API for batch_submit function. All engines are
- * required to implement this API.
- */
-int
-spdk_accel_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
-	if (accel_task == NULL) {
-		return -ENOMEM;
-	}
-
-	return accel_ch->engine->batch_submit(accel_ch->ch, batch, _accel_engine_done,
-					      accel_task->offload_ctx);
-}
-
-/* Accel framework public API for getting max batch. All engines are
- * required to implement this API.
- */
-uint32_t
-spdk_accel_batch_get_max(struct spdk_io_channel *ch)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-
-	return accel_ch->engine->batch_get_max();
-}
-
-/* Accel framework public API for for when an app is unable to complete a batch sequence,
- * it cancels with this API.
- */
-int
-spdk_accel_batch_cancel(struct spdk_io_channel *ch, struct spdk_accel_batch *batch)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-
-	return accel_ch->engine->batch_cancel(accel_ch->ch, batch);
-}
-
-/* Accel framework public API for batch prep_copy function. All engines are
- * required to implement this API.
- */
-int
-spdk_accel_batch_prep_copy(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *dst,
-			   void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
-	if (accel_task == NULL) {
-		return -ENOMEM;
-	}
-
-	return accel_ch->engine->batch_prep_copy(accel_ch->ch, batch, dst, src, nbytes,
-			_accel_engine_done, accel_task->offload_ctx);
-}
-
-/* Accel framework public API for batch prep_dualcast function.  All engines are
- * required to implement this API.
- */
-int
-spdk_accel_batch_prep_dualcast(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			       void *dst1, void *dst2, void *src, uint64_t nbytes,
-			       spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-
-	if ((uintptr_t)dst1 & (ALIGN_4K - 1) || (uintptr_t)dst2 & (ALIGN_4K - 1)) {
-		SPDK_ERRLOG("Dualcast requires 4K alignment on dst addresses\n");
-		return -EINVAL;
-	}
-
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
-	if (accel_task == NULL) {
-		return -ENOMEM;
-	}
-
-	return accel_ch->engine->batch_prep_dualcast(accel_ch->ch, batch, dst1, dst2, src,
-			nbytes, _accel_engine_done, accel_task->offload_ctx);
-}
-
-/* Accel framework public API for batch prep_compare function.  All engines are
- * required to implement this API.
- */
-int
-spdk_accel_batch_prep_compare(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			      void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn,
-			      void *cb_arg)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
-	if (accel_task == NULL) {
-		return -ENOMEM;
-	}
-
-	return accel_ch->engine->batch_prep_compare(accel_ch->ch, batch, src1, src2, nbytes,
-			_accel_engine_done, accel_task->offload_ctx);
-}
-
-/* Accel framework public API for batch prep_fill function.  All engines are
- * required to implement this API.
- */
-int
-spdk_accel_batch_prep_fill(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *dst,
-			   uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
-	if (accel_task == NULL) {
-		return -ENOMEM;
-	}
-
-	return accel_ch->engine->batch_prep_fill(accel_ch->ch, batch, dst, fill, nbytes,
-			_accel_engine_done, accel_task->offload_ctx);
-}
-
-/* Accel framework public API for batch prep_crc32c function.  All engines are
- * required to implement this API.
- */
-int
-spdk_accel_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			     uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
-			     spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
-	if (accel_task == NULL) {
-		return -ENOMEM;
-	}
-
-	return accel_ch->engine->batch_prep_crc32c(accel_ch->ch, batch, dst, src, seed, nbytes,
-			_accel_engine_done, accel_task->offload_ctx);
-}
-
 /* Accel framework public API for compare function */
 int
 spdk_accel_submit_compare(struct spdk_io_channel *ch, void *src1, void *src2, uint64_t nbytes,
@@ -360,19 +245,24 @@ spdk_accel_submit_compare(struct spdk_io_channel *ch, void *src1, void *src2, ui
 {
 	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
 	struct spdk_accel_task *accel_task;
+	int rc;
 
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
+	accel_task = _get_task(accel_ch, NULL, cb_fn, cb_arg);
 	if (accel_task == NULL) {
 		return -ENOMEM;
 	}
 
-	/* If the engine does not support it, fallback to the sw implementation. */
-	if (accel_ch->engine->compare) {
-		return accel_ch->engine->compare(accel_ch->ch, src1, src2, nbytes,
-						 _accel_engine_done, accel_task->offload_ctx);
+	accel_task->src = src1;
+	accel_task->src2 = src2;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_COMPARE;
+
+	if (_is_supported(accel_ch->engine, ACCEL_COMPARE)) {
+		return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
 	} else {
-		return sw_accel_submit_compare(accel_ch->ch, src1, src2, nbytes,
-					       _accel_engine_done, accel_task->offload_ctx);
+		rc = _sw_accel_compare(src1, src2, nbytes);
+		spdk_accel_task_complete(accel_task, rc);
+		return 0;
 	}
 }
 
@@ -384,18 +274,22 @@ spdk_accel_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill, uint
 	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
 	struct spdk_accel_task *accel_task;
 
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
+	accel_task = _get_task(accel_ch, NULL, cb_fn, cb_arg);
 	if (accel_task == NULL) {
 		return -ENOMEM;
 	}
 
-	/* If the engine does not support it, fallback to the sw implementation. */
-	if (accel_ch->engine->fill) {
-		return accel_ch->engine->fill(accel_ch->ch, dst, fill, nbytes,
-					      _accel_engine_done, accel_task->offload_ctx);
+	accel_task->dst = dst;
+	accel_task->fill_pattern = fill;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_MEMFILL;
+
+	if (_is_supported(accel_ch->engine, ACCEL_FILL)) {
+		return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
 	} else {
-		return sw_accel_submit_fill(accel_ch->ch, dst, fill, nbytes,
-					    _accel_engine_done, accel_task->offload_ctx);
+		_sw_accel_fill(dst, fill, nbytes);
+		spdk_accel_task_complete(accel_task, 0);
+		return 0;
 	}
 }
 
@@ -407,21 +301,304 @@ spdk_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src, u
 	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
 	struct spdk_accel_task *accel_task;
 
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
+	accel_task = _get_task(accel_ch, NULL, cb_fn, cb_arg);
 	if (accel_task == NULL) {
 		return -ENOMEM;
 	}
 
-	/* If the engine does not support it, fallback to the sw implementation. */
-	if (accel_ch->engine->crc32c) {
-		return accel_ch->engine->crc32c(accel_ch->ch, dst, src,	seed, nbytes,
-						_accel_engine_done, accel_task->offload_ctx);
+	accel_task->dst = (void *)dst;
+	accel_task->src = src;
+	accel_task->seed = seed;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_CRC32C;
+
+	if (_is_supported(accel_ch->engine, ACCEL_CRC32C)) {
+		return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
 	} else {
-		return sw_accel_submit_crc32c(accel_ch->ch, dst, src, seed, nbytes,
-					      _accel_engine_done, accel_task->offload_ctx);
+		_sw_accel_crc32c(dst, src, seed, nbytes);
+		spdk_accel_task_complete(accel_task, 0);
+		return 0;
 	}
 }
 
+/* Accel framework public API for getting max operations for a batch. */
+uint32_t
+spdk_accel_batch_get_max(struct spdk_io_channel *ch)
+{
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+
+	/* Use the smaller of the currently selected engine or pure SW implementation. */
+	return spdk_min(accel_ch->engine->batch_get_max(accel_ch->engine_ch),
+			MAX_BATCH_SIZE);
+}
+
+int
+spdk_accel_batch_prep_copy(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *dst,
+			   void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
+{
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+	struct spdk_accel_task *accel_task;
+
+	accel_task = _get_task(accel_ch, batch, cb_fn, cb_arg);
+	if (accel_task == NULL) {
+		return -ENOMEM;
+	}
+
+	accel_task->src = src;
+	accel_task->dst = dst;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_MEMMOVE;
+
+	if (_is_supported(accel_ch->engine, ACCEL_COPY)) {
+		TAILQ_INSERT_TAIL(&batch->hw_tasks, accel_task, link);
+	} else {
+		TAILQ_INSERT_TAIL(&batch->sw_tasks, accel_task, link);
+	}
+
+	return 0;
+}
+
+int
+spdk_accel_batch_prep_dualcast(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+			       void *dst1, void *dst2, void *src, uint64_t nbytes,
+			       spdk_accel_completion_cb cb_fn, void *cb_arg)
+{
+	struct spdk_accel_task *accel_task;
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+
+	if ((uintptr_t)dst1 & (ALIGN_4K - 1) || (uintptr_t)dst2 & (ALIGN_4K - 1)) {
+		SPDK_ERRLOG("Dualcast requires 4K alignment on dst addresses\n");
+		return -EINVAL;
+	}
+
+	accel_task = _get_task(accel_ch, batch, cb_fn, cb_arg);
+	if (accel_task == NULL) {
+		return -ENOMEM;
+	}
+
+	accel_task->src = src;
+	accel_task->dst = dst1;
+	accel_task->dst2 = dst2;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_DUALCAST;
+
+	if (_is_supported(accel_ch->engine, ACCEL_DUALCAST)) {
+		TAILQ_INSERT_TAIL(&batch->hw_tasks, accel_task, link);
+	} else {
+		TAILQ_INSERT_TAIL(&batch->sw_tasks, accel_task, link);
+	}
+
+	return 0;
+}
+
+int
+spdk_accel_batch_prep_compare(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+			      void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn,
+			      void *cb_arg)
+{
+	struct spdk_accel_task *accel_task;
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+
+	accel_task = _get_task(accel_ch, batch, cb_fn, cb_arg);
+	if (accel_task == NULL) {
+		return -ENOMEM;
+	}
+
+	accel_task->src = src1;
+	accel_task->src2 = src2;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_COMPARE;
+
+	if (_is_supported(accel_ch->engine, ACCEL_COMPARE)) {
+		TAILQ_INSERT_TAIL(&batch->hw_tasks, accel_task, link);
+	} else {
+		TAILQ_INSERT_TAIL(&batch->sw_tasks, accel_task, link);
+	}
+
+	return 0;
+}
+
+int
+spdk_accel_batch_prep_fill(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *dst,
+			   uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
+{
+	struct spdk_accel_task *accel_task;
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+
+	accel_task = _get_task(accel_ch, batch, cb_fn, cb_arg);
+	if (accel_task == NULL) {
+		return -ENOMEM;
+	}
+
+	accel_task->dst = dst;
+	accel_task->fill_pattern = fill;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_MEMFILL;
+
+	if (_is_supported(accel_ch->engine, ACCEL_FILL)) {
+		TAILQ_INSERT_TAIL(&batch->hw_tasks, accel_task, link);
+	} else {
+		TAILQ_INSERT_TAIL(&batch->sw_tasks, accel_task, link);
+	}
+
+	return 0;
+}
+
+int
+spdk_accel_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+			     uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
+			     spdk_accel_completion_cb cb_fn, void *cb_arg)
+{
+	struct spdk_accel_task *accel_task;
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+
+	accel_task = _get_task(accel_ch, batch, cb_fn, cb_arg);
+	if (accel_task == NULL) {
+		return -ENOMEM;
+	}
+
+	accel_task->dst = dst;
+	accel_task->src = src;
+	accel_task->seed = seed;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_CRC32C;
+
+	if (_is_supported(accel_ch->engine, ACCEL_CRC32C)) {
+		TAILQ_INSERT_TAIL(&batch->hw_tasks, accel_task, link);
+	} else {
+		TAILQ_INSERT_TAIL(&batch->sw_tasks, accel_task, link);
+	}
+
+	return 0;
+}
+
+/* Accel framework public API for batch_create function. */
+struct spdk_accel_batch *
+spdk_accel_batch_create(struct spdk_io_channel *ch)
+{
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+	struct spdk_accel_batch *batch;
+
+	batch = TAILQ_FIRST(&accel_ch->batch_pool);
+	if (batch == NULL) {
+		/* The application needs to handle this case (no batches available) */
+		return NULL;
+	}
+
+	TAILQ_REMOVE(&accel_ch->batch_pool, batch, link);
+	TAILQ_INIT(&batch->hw_tasks);
+	TAILQ_INIT(&batch->sw_tasks);
+	batch->count = batch->status = 0;
+	batch->accel_ch = accel_ch;
+	TAILQ_INSERT_TAIL(&accel_ch->batches, batch, link);
+	SPDK_DEBUGLOG(accel, "Create batch %p\n", batch);
+
+	return (struct spdk_accel_batch *)batch;
+}
+
+/* Accel framework public API for batch_submit function. */
+int
+spdk_accel_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+			spdk_accel_completion_cb cb_fn, void *cb_arg)
+{
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+	struct spdk_accel_task *accel_task, *next_task;
+	int rc = 0;
+
+	if (_is_batch_valid(batch, accel_ch) == false) {
+		SPDK_ERRLOG("Attempt to access an invalid batch.\n.");
+		return -EINVAL;
+	}
+
+	batch->cb_fn = cb_fn;
+	batch->cb_arg = cb_arg;
+
+	/* Process any HW commands. */
+	if (!TAILQ_EMPTY(&batch->hw_tasks)) {
+		accel_task = TAILQ_FIRST(&batch->hw_tasks);
+
+		/* Clear the hw_tasks list but leave the tasks linked. */
+		TAILQ_INIT(&batch->hw_tasks);
+
+		/* The submit_tasks function will always return success and use the
+		 * task callbacks to report errors.
+		 */
+		accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
+	}
+
+	/* Process any SW commands. */
+	accel_task = TAILQ_FIRST(&batch->sw_tasks);
+
+	/* Clear the hw_tasks list but leave the tasks linked. */
+	TAILQ_INIT(&batch->sw_tasks);
+
+	while (accel_task) {
+		/* Grab the next task now before it's returned to the pool in the cb_fn. */
+		next_task = TAILQ_NEXT(accel_task, link);
+
+		switch (accel_task->op_code) {
+		case ACCEL_OPCODE_MEMMOVE:
+			_sw_accel_copy(accel_task->dst, accel_task->src, accel_task->nbytes);
+			spdk_accel_task_complete(accel_task, 0);
+			break;
+		case ACCEL_OPCODE_MEMFILL:
+			_sw_accel_fill(accel_task->dst, accel_task->fill_pattern, accel_task->nbytes);
+			spdk_accel_task_complete(accel_task, 0);
+			break;
+		case ACCEL_OPCODE_COMPARE:
+			rc = _sw_accel_compare(accel_task->src, accel_task->src2, accel_task->nbytes);
+			spdk_accel_task_complete(accel_task, rc);
+			batch->status |= rc;
+			break;
+		case ACCEL_OPCODE_CRC32C:
+			_sw_accel_crc32c(accel_task->dst, accel_task->src, accel_task->seed,
+					 accel_task->nbytes);
+			spdk_accel_task_complete(accel_task, 0);
+			break;
+		case ACCEL_OPCODE_DUALCAST:
+			_sw_accel_dualcast(accel_task->dst, accel_task->dst2, accel_task->src,
+					   accel_task->nbytes);
+			spdk_accel_task_complete(accel_task, 0);
+			break;
+		default:
+			assert(false);
+			break;
+		}
+		accel_task = next_task;
+	};
+
+	/* There are no submission errors possible at this point. Any possible errors will
+	 * happen in the task cb_fn calls and OR'd into the batch->status.
+	 */
+	return 0;
+}
+
+/* Accel framework public API for batch cancel function. If the engine does
+ * not support batching it is done here at the accel_fw level.
+ */
+int
+spdk_accel_batch_cancel(struct spdk_io_channel *ch, struct spdk_accel_batch *batch)
+{
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+	struct spdk_accel_task *accel_task;
+
+	/* Cancel anything currently oustanding for this batch. */
+	while ((batch = TAILQ_FIRST(&accel_ch->batches))) {
+		TAILQ_REMOVE(&accel_ch->batches, batch, link);
+		while ((accel_task = TAILQ_FIRST(&batch->hw_tasks))) {
+			TAILQ_REMOVE(&batch->hw_tasks, accel_task, link);
+			TAILQ_INSERT_TAIL(&accel_ch->task_pool, accel_task, link);
+		}
+		while ((accel_task = TAILQ_FIRST(&batch->sw_tasks))) {
+			TAILQ_REMOVE(&batch->sw_tasks, accel_task, link);
+			TAILQ_INSERT_TAIL(&accel_ch->task_pool, accel_task, link);
+		}
+		TAILQ_INSERT_TAIL(&accel_ch->batch_pool, batch, link);
+	}
+
+	return 0;
+}
+
 /* Helper function when when accel modules register with the framework. */
 void spdk_accel_module_list_add(struct spdk_accel_module_if *accel_module)
 {
@@ -438,6 +615,7 @@ accel_engine_create_cb(void *io_device, void *ctx_buf)
 	struct accel_io_channel	*accel_ch = ctx_buf;
 	struct spdk_accel_task *accel_task;
 	uint8_t *task_mem;
+	struct spdk_accel_batch *batch;
 	int i;
 
 	accel_ch->task_pool_base = calloc(MAX_TASKS_PER_CHANNEL, g_max_accel_module_size);
@@ -453,18 +631,31 @@ accel_engine_create_cb(void *io_device, void *ctx_buf)
 		task_mem += g_max_accel_module_size;
 	}
 
-	if (g_hw_accel_engine != NULL) {
-		accel_ch->ch = g_hw_accel_engine->get_io_channel();
-		if (accel_ch->ch != NULL) {
-			accel_ch->engine = g_hw_accel_engine;
-			return 0;
-		}
+	TAILQ_INIT(&accel_ch->batch_pool);
+	TAILQ_INIT(&accel_ch->batches);
+	accel_ch->batch_pool_base = calloc(MAX_NUM_BATCHES_PER_CHANNEL, sizeof(struct spdk_accel_batch));
+	if (accel_ch->batch_pool_base == NULL) {
+		free(accel_ch->task_pool_base);
+		return -ENOMEM;
 	}
 
-	/* No hw engine enabled, use sw. */
-	accel_ch->ch = g_sw_accel_engine->get_io_channel();
-	assert(accel_ch->ch != NULL);
-	accel_ch->engine = g_sw_accel_engine;
+	batch = (struct spdk_accel_batch *)accel_ch->batch_pool_base;
+	for (i = 0 ; i < MAX_NUM_BATCHES_PER_CHANNEL; i++) {
+		TAILQ_INSERT_TAIL(&accel_ch->batch_pool, batch, link);
+		batch++;
+	}
+
+	if (g_hw_accel_engine != NULL) {
+		accel_ch->engine_ch = g_hw_accel_engine->get_io_channel();
+		accel_ch->engine = g_hw_accel_engine;
+	} else {
+		/* No hw engine enabled, use sw. */
+		accel_ch->engine_ch = g_sw_accel_engine->get_io_channel();
+		accel_ch->engine = g_sw_accel_engine;
+	}
+	assert(accel_ch->engine_ch != NULL);
+	accel_ch->engine->capabilities = accel_ch->engine->get_capabilities();
+
 	return 0;
 }
 
@@ -474,7 +665,8 @@ accel_engine_destroy_cb(void *io_device, void *ctx_buf)
 {
 	struct accel_io_channel	*accel_ch = ctx_buf;
 
-	spdk_put_io_channel(accel_ch->ch);
+	free(accel_ch->batch_pool_base);
+	spdk_put_io_channel(accel_ch->engine_ch);
 	free(accel_ch->task_pool_base);
 }
 
@@ -526,8 +718,8 @@ spdk_accel_write_config_json(struct spdk_json_write_ctx *w)
 	struct spdk_accel_module_if *accel_engine_module;
 
 	/*
-	 * The accel engine has no config, there may be some in
-	 * the modules though.
+	 * The accel fw has no config, there may be some in
+	 * the engines/modules though.
 	 */
 	spdk_json_write_array_begin(w);
 	TAILQ_FOREACH(accel_engine_module, &spdk_accel_module_list, tailq) {
@@ -574,418 +766,67 @@ spdk_accel_engine_finish(spdk_accel_fini_cb cb_fn, void *cb_arg)
 /*
  * The SW Accelerator module is "built in" here (rest of file)
  */
-
-#define SW_ACCEL_BATCH_SIZE 2048
-
-enum sw_accel_opcode {
-	SW_ACCEL_OPCODE_MEMMOVE		= 0,
-	SW_ACCEL_OPCODE_MEMFILL		= 1,
-	SW_ACCEL_OPCODE_COMPARE		= 2,
-	SW_ACCEL_OPCODE_CRC32C		= 3,
-	SW_ACCEL_OPCODE_DUALCAST	= 4,
-};
-
-struct sw_accel_op {
-	struct sw_accel_io_channel	*sw_ch;
-	void				*cb_arg;
-	spdk_accel_completion_cb	cb_fn;
-	void				*src;
-	union {
-		void			*dst;
-		void			*src2;
-	};
-	void				*dst2;
-	uint32_t			seed;
-	uint64_t			fill_pattern;
-	enum sw_accel_opcode		op_code;
-	uint64_t			nbytes;
-	TAILQ_ENTRY(sw_accel_op)	link;
-};
-
-/* The sw accel engine only supports one outstanding batch at a time. */
-struct sw_accel_io_channel {
-	TAILQ_HEAD(, sw_accel_op)	op_pool;
-	TAILQ_HEAD(, sw_accel_op)	batch;
-};
-
 static uint64_t
 sw_accel_get_capabilities(void)
 {
-	return ACCEL_COPY | ACCEL_FILL | ACCEL_CRC32C | ACCEL_COMPARE |
-	       ACCEL_DUALCAST | ACCEL_BATCH;
-}
-
-static uint32_t
-sw_accel_batch_get_max(void)
-{
-	return SW_ACCEL_BATCH_SIZE;
-}
-
-/* The sw engine plug-in does not ahve a public API, it is only callable
- * from the accel fw and thus does not need to have its own struct definition
- * of a batch, it just simply casts the address of the single supported batch
- * as the struct spdk_accel_batch pointer.
- */
-static struct spdk_accel_batch *
-sw_accel_batch_start(struct spdk_io_channel *ch)
-{
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-
-	if (!TAILQ_EMPTY(&sw_ch->batch)) {
-		SPDK_ERRLOG("SW accel engine only supports one batch at a time.\n");
-		return NULL;
-	}
-
-	return (struct spdk_accel_batch *)&sw_ch->batch;
-}
-
-static struct sw_accel_op *
-_prep_op(struct sw_accel_io_channel *sw_ch, struct spdk_accel_batch *batch,
-	 spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct sw_accel_op *op;
-
-	if ((struct spdk_accel_batch *)&sw_ch->batch != batch) {
-		SPDK_ERRLOG("Invalid batch\n");
-		return NULL;
-	}
-
-	if (!TAILQ_EMPTY(&sw_ch->op_pool)) {
-		op = TAILQ_FIRST(&sw_ch->op_pool);
-		TAILQ_REMOVE(&sw_ch->op_pool, op, link);
-	} else {
-		SPDK_ERRLOG("Ran out of operations for batch\n");
-		return NULL;
-	}
-
-	op->cb_arg = cb_arg;
-	op->cb_fn = cb_fn;
-	op->sw_ch = sw_ch;
-
-	return op;
-}
-
-static int
-sw_accel_batch_prep_copy(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			 void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct sw_accel_op *op;
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(sw_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->src = src;
-	op->dst = dst;
-	op->nbytes = nbytes;
-	op->op_code = SW_ACCEL_OPCODE_MEMMOVE;
-	TAILQ_INSERT_TAIL(&sw_ch->batch, op, link);
-
+	/* No HW acceleration capabilities. */
 	return 0;
 }
 
-static int
-sw_accel_batch_prep_dualcast(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *dst1,
-			     void *dst2,
-			     void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
+static void
+_sw_accel_dualcast(void *dst1, void *dst2, void *src, uint64_t nbytes)
 {
-	struct sw_accel_op *op;
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(sw_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->src = src;
-	op->dst = dst1;
-	op->dst2 = dst2;
-	op->nbytes = nbytes;
-	op->op_code = SW_ACCEL_OPCODE_DUALCAST;
-	TAILQ_INSERT_TAIL(&sw_ch->batch, op, link);
-
-	return 0;
-}
-
-static int
-sw_accel_batch_prep_compare(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *src1,
-			    void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct sw_accel_op *op;
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(sw_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->src = src1;
-	op->src2 = src2;
-	op->nbytes = nbytes;
-	op->op_code = SW_ACCEL_OPCODE_COMPARE;
-	TAILQ_INSERT_TAIL(&sw_ch->batch, op, link);
-
-	return 0;
-}
-
-static int
-sw_accel_batch_prep_fill(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *dst,
-			 uint8_t fill,
-			 uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct sw_accel_op *op;
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(sw_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->dst = dst;
-	op->fill_pattern = fill;
-	op->nbytes = nbytes;
-	op->op_code = SW_ACCEL_OPCODE_MEMFILL;
-	TAILQ_INSERT_TAIL(&sw_ch->batch, op, link);
-
-	return 0;
-}
-
-static int
-sw_accel_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			   uint32_t *dst,
-			   void *src, uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct sw_accel_op *op;
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(sw_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->dst = (void *)dst;
-	op->src = src;
-	op->seed = seed;
-	op->nbytes = nbytes;
-	op->op_code = SW_ACCEL_OPCODE_CRC32C;
-	TAILQ_INSERT_TAIL(&sw_ch->batch, op, link);
-
-	return 0;
-}
-
-
-static int
-sw_accel_batch_cancel(struct spdk_io_channel *ch, struct spdk_accel_batch *batch)
-{
-	struct sw_accel_op *op;
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-
-	if ((struct spdk_accel_batch *)&sw_ch->batch != batch) {
-		SPDK_ERRLOG("Invalid batch\n");
-		return -EINVAL;
-	}
-
-	/* Cancel the batch items by moving them back to the op_pool. */
-	while ((op = TAILQ_FIRST(&sw_ch->batch))) {
-		TAILQ_REMOVE(&sw_ch->batch, op, link);
-		TAILQ_INSERT_TAIL(&sw_ch->op_pool, op, link);
-	}
-
-	return 0;
-}
-
-static int
-sw_accel_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-		      spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct sw_accel_op *op;
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-	int batch_status = 0, cmd_status = 0;
-
-	if ((struct spdk_accel_batch *)&sw_ch->batch != batch) {
-		SPDK_ERRLOG("Invalid batch\n");
-		return -EINVAL;
-	}
-
-	/* Complete the batch items. */
-	while ((op = TAILQ_FIRST(&sw_ch->batch))) {
-		TAILQ_REMOVE(&sw_ch->batch, op, link);
-		accel_task = (struct spdk_accel_task *)((uintptr_t)op->cb_arg -
-							offsetof(struct spdk_accel_task, offload_ctx));
-
-		switch (op->op_code) {
-		case SW_ACCEL_OPCODE_MEMMOVE:
-			memcpy(op->dst, op->src, op->nbytes);
-			break;
-		case SW_ACCEL_OPCODE_DUALCAST:
-			memcpy(op->dst, op->src, op->nbytes);
-			memcpy(op->dst2, op->src, op->nbytes);
-			break;
-		case SW_ACCEL_OPCODE_COMPARE:
-			cmd_status = memcmp(op->src, op->src2, op->nbytes);
-			break;
-		case SW_ACCEL_OPCODE_MEMFILL:
-			memset(op->dst, op->fill_pattern, op->nbytes);
-			break;
-		case SW_ACCEL_OPCODE_CRC32C:
-			*(uint32_t *)op->dst = spdk_crc32c_update(op->src, op->nbytes, ~op->seed);
-			break;
-		default:
-			assert(false);
-			break;
-		}
-
-		batch_status |= cmd_status;
-		op->cb_fn(accel_task, cmd_status);
-		TAILQ_INSERT_TAIL(&sw_ch->op_pool, op, link);
-	}
-
-	/* Now complete the batch request itself. */
-	accel_task = (struct spdk_accel_task *)((uintptr_t)cb_arg -
-						offsetof(struct spdk_accel_task, offload_ctx));
-	cb_fn(accel_task, batch_status);
-
-	return 0;
-}
-
-static int
-sw_accel_submit_copy(struct spdk_io_channel *ch, void *dst, void *src,
-		     uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct spdk_accel_task *accel_task;
-
-	memcpy(dst, src, (size_t)nbytes);
-
-	accel_task = (struct spdk_accel_task *)((uintptr_t)cb_arg -
-						offsetof(struct spdk_accel_task, offload_ctx));
-	cb_fn(accel_task, 0);
-	return 0;
-}
-
-static int
-sw_accel_submit_dualcast(struct spdk_io_channel *ch, void *dst1, void *dst2,
-			 void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct spdk_accel_task *accel_task;
-
 	memcpy(dst1, src, (size_t)nbytes);
 	memcpy(dst2, src, (size_t)nbytes);
+}
 
-	accel_task = (struct spdk_accel_task *)((uintptr_t)cb_arg -
-						offsetof(struct spdk_accel_task, offload_ctx));
-	cb_fn(accel_task, 0);
-	return 0;
+static void
+_sw_accel_copy(void *dst, void *src, uint64_t nbytes)
+{
+	memcpy(dst, src, (size_t)nbytes);
 }
 
 static int
-sw_accel_submit_compare(struct spdk_io_channel *ch, void *src1, void *src2,
-			uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
+_sw_accel_compare(void *src1, void *src2, uint64_t nbytes)
 {
-	struct spdk_accel_task *accel_task;
-	int result;
-
-	result = memcmp(src1, src2, (size_t)nbytes);
-
-	accel_task = (struct spdk_accel_task *)((uintptr_t)cb_arg -
-						offsetof(struct spdk_accel_task, offload_ctx));
-	cb_fn(accel_task, result);
-
-	return 0;
+	return memcmp(src1, src2, (size_t)nbytes);
 }
 
-static int
-sw_accel_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill,
-		     uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
+static void
+_sw_accel_fill(void *dst, uint8_t fill, uint64_t nbytes)
 {
-	struct spdk_accel_task *accel_task;
-
 	memset(dst, fill, nbytes);
-	accel_task = (struct spdk_accel_task *)((uintptr_t)cb_arg -
-						offsetof(struct spdk_accel_task, offload_ctx));
-	cb_fn(accel_task, 0);
-
-	return 0;
 }
 
-static int
-sw_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src,
-		       uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
+static void
+_sw_accel_crc32c(uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes)
 {
-	struct spdk_accel_task *accel_task;
-
 	*dst = spdk_crc32c_update(src, nbytes, ~seed);
-	accel_task = (struct spdk_accel_task *)((uintptr_t)cb_arg -
-						offsetof(struct spdk_accel_task, offload_ctx));
-	cb_fn(accel_task, 0);
-
-	return 0;
 }
 
 static struct spdk_io_channel *sw_accel_get_io_channel(void);
 
+static uint32_t
+sw_accel_batch_get_max(struct spdk_io_channel *ch)
+{
+	return MAX_BATCH_SIZE;
+}
+
 static struct spdk_accel_engine sw_accel_engine = {
 	.get_capabilities	= sw_accel_get_capabilities,
-	.copy			= sw_accel_submit_copy,
-	.dualcast		= sw_accel_submit_dualcast,
-	.batch_get_max		= sw_accel_batch_get_max,
-	.batch_create		= sw_accel_batch_start,
-	.batch_cancel		= sw_accel_batch_cancel,
-	.batch_prep_copy	= sw_accel_batch_prep_copy,
-	.batch_prep_dualcast	= sw_accel_batch_prep_dualcast,
-	.batch_prep_compare	= sw_accel_batch_prep_compare,
-	.batch_prep_fill	= sw_accel_batch_prep_fill,
-	.batch_prep_crc32c	= sw_accel_batch_prep_crc32c,
-	.batch_submit		= sw_accel_batch_submit,
-	.compare		= sw_accel_submit_compare,
-	.fill			= sw_accel_submit_fill,
-	.crc32c			= sw_accel_submit_crc32c,
 	.get_io_channel		= sw_accel_get_io_channel,
+	.batch_get_max		= sw_accel_batch_get_max,
 };
 
 static int
 sw_accel_create_cb(void *io_device, void *ctx_buf)
 {
-	struct sw_accel_io_channel *sw_ch = ctx_buf;
-	struct sw_accel_op *op;
-	int i;
-
-	TAILQ_INIT(&sw_ch->batch);
-
-	TAILQ_INIT(&sw_ch->op_pool);
-	for (i = 0 ; i < SW_ACCEL_BATCH_SIZE ; i++) {
-		op = calloc(1, sizeof(struct sw_accel_op));
-		if (op == NULL) {
-			SPDK_ERRLOG("Failed to allocate operation for batch.\n");
-			while ((op = TAILQ_FIRST(&sw_ch->op_pool))) {
-				TAILQ_REMOVE(&sw_ch->op_pool, op, link);
-				free(op);
-			}
-			return -ENOMEM;
-		}
-		TAILQ_INSERT_TAIL(&sw_ch->op_pool, op, link);
-	}
-
 	return 0;
 }
 
 static void
 sw_accel_destroy_cb(void *io_device, void *ctx_buf)
 {
-	struct sw_accel_io_channel *sw_ch = ctx_buf;
-	struct sw_accel_op *op;
-
-	while ((op = TAILQ_FIRST(&sw_ch->op_pool))) {
-		TAILQ_REMOVE(&sw_ch->op_pool, op, link);
-		free(op);
-	}
 }
 
 static struct spdk_io_channel *sw_accel_get_io_channel(void)
@@ -1004,7 +845,7 @@ sw_accel_engine_init(void)
 {
 	accel_sw_register(&sw_accel_engine);
 	spdk_io_device_register(&sw_accel_engine, sw_accel_create_cb, sw_accel_destroy_cb,
-				sizeof(struct sw_accel_io_channel), "sw_accel_engine");
+				0, "sw_accel_engine");
 
 	return 0;
 }
@@ -1018,5 +859,7 @@ sw_accel_engine_fini(void *ctxt)
 	spdk_accel_engine_module_finish();
 }
 
+SPDK_LOG_REGISTER_COMPONENT(accel)
+
 SPDK_ACCEL_MODULE_REGISTER(sw_accel_engine_init, sw_accel_engine_fini,
 			   NULL, sw_accel_engine_get_ctx_size)
diff --git a/lib/accel/spdk_accel.map b/lib/accel/spdk_accel.map
index 20f86c56f..88ebd38b1 100644
--- a/lib/accel/spdk_accel.map
+++ b/lib/accel/spdk_accel.map
@@ -26,7 +26,7 @@
 	# functions needed by modules
 	spdk_accel_hw_engine_register;
 	spdk_accel_module_list_add;
-
+	spdk_accel_task_complete;
 
 	local: *;
 };
diff --git a/mk/spdk.lib_deps.mk b/mk/spdk.lib_deps.mk
index 5ccc3b8a6..f16eec5d7 100644
--- a/mk/spdk.lib_deps.mk
+++ b/mk/spdk.lib_deps.mk
@@ -107,7 +107,7 @@ DEPDIRS-blobfs_bdev += event
 endif
 
 # module/accel
-DEPDIRS-accel_ioat := log ioat thread $(JSON_LIBS) accel util
+DEPDIRS-accel_ioat := log ioat thread $(JSON_LIBS) accel
 DEPDIRS-accel_idxd := log idxd thread $(JSON_LIBS) accel
 
 # module/env_dpdk
diff --git a/module/accel/idxd/accel_engine_idxd.c b/module/accel/idxd/accel_engine_idxd.c
index 14c839806..f6a31d78d 100644
--- a/module/accel/idxd/accel_engine_idxd.c
+++ b/module/accel/idxd/accel_engine_idxd.c
@@ -46,10 +46,9 @@
 #include "spdk/util.h"
 #include "spdk/json.h"
 
-#define ALIGN_4K 0x1000
-
 static bool g_idxd_enable = false;
 uint32_t g_config_number;
+static uint32_t g_batch_max;
 
 enum channel_state {
 	IDXD_CHANNEL_ACTIVE,
@@ -73,35 +72,13 @@ struct idxd_device {
 static TAILQ_HEAD(, idxd_device) g_idxd_devices = TAILQ_HEAD_INITIALIZER(g_idxd_devices);
 static struct idxd_device *g_next_dev = NULL;
 
-struct idxd_op {
-	struct spdk_idxd_io_channel	*chan;
-	void				*cb_arg;
-	spdk_idxd_req_cb		cb_fn;
-	void				*src;
-	union {
-		void			*dst;
-		void			*src2;
-	};
-	void				*dst2;
-	uint32_t			seed;
-	uint64_t			fill_pattern;
-	uint32_t			op_code;
-	uint64_t			nbytes;
-	struct idxd_batch		*batch;
-	TAILQ_ENTRY(idxd_op)		link;
-};
-
 struct idxd_io_channel {
 	struct spdk_idxd_io_channel	*chan;
 	struct spdk_idxd_device		*idxd;
 	struct idxd_device		*dev;
 	enum channel_state		state;
 	struct spdk_poller		*poller;
-	TAILQ_HEAD(, idxd_op)		queued_ops;
-};
-
-struct idxd_task {
-	spdk_accel_completion_cb	cb;
+	TAILQ_HEAD(, spdk_accel_task)	queued_tasks;
 };
 
 pthread_mutex_t g_configuration_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -123,12 +100,174 @@ idxd_select_device(void)
 	return g_next_dev;
 }
 
+static void
+idxd_done(void *cb_arg, int status)
+{
+	struct spdk_accel_task *accel_task = cb_arg;
+
+	spdk_accel_task_complete(accel_task, status);
+}
+
+static int
+_process_single_task(struct spdk_io_channel *ch, struct spdk_accel_task *task)
+{
+	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
+	int rc = 0;
+
+	switch (task->op_code) {
+	case ACCEL_OPCODE_MEMMOVE:
+		rc = spdk_idxd_submit_copy(chan->chan, task->dst, task->src, task->nbytes, idxd_done, task);
+		break;
+	case ACCEL_OPCODE_DUALCAST:
+		rc = spdk_idxd_submit_dualcast(chan->chan, task->dst, task->dst2, task->src, task->nbytes,
+					       idxd_done, task);
+		break;
+	case ACCEL_OPCODE_COMPARE:
+		rc = spdk_idxd_submit_compare(chan->chan, task->src, task->src2, task->nbytes, idxd_done, task);
+		break;
+	case ACCEL_OPCODE_MEMFILL:
+		rc = spdk_idxd_submit_fill(chan->chan, task->dst, task->fill_pattern, task->nbytes, idxd_done,
+					   task);
+		break;
+	case ACCEL_OPCODE_CRC32C:
+		rc = spdk_idxd_submit_crc32c(chan->chan, task->dst, task->src, task->seed, task->nbytes, idxd_done,
+					     task);
+		break;
+	default:
+		assert(false);
+		rc = -EINVAL;
+		break;
+	}
+
+	return rc;
+}
+
+static int
+idxd_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *first_task)
+{
+	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
+	struct spdk_accel_task *task, *tmp, *batch_task;
+	struct idxd_batch *idxd_batch;
+	TAILQ_HEAD(, spdk_accel_task) batch_tasks;
+	int rc = 0;
+	uint32_t task_count = 0;
+
+	task = first_task;
+
+	if (chan->state == IDXD_CHANNEL_PAUSED) {
+		goto queue_tasks;
+	} else if (chan->state == IDXD_CHANNEL_ERROR) {
+		while (task) {
+			tmp = TAILQ_NEXT(task, link);
+			spdk_accel_task_complete(task, -EINVAL);
+			task = tmp;
+		}
+		return 0;
+	}
+
+	/* If this is just a single task handle it here. */
+	if (!TAILQ_NEXT(task, link)) {
+		rc = _process_single_task(ch, task);
+
+		if (rc == -EBUSY) {
+			goto queue_tasks;
+		} else if (rc) {
+			spdk_accel_task_complete(task, rc);
+		}
+
+		return 0;
+	}
+
+	/* More than one task, create IDXD batch(es). */
+	do {
+		idxd_batch = spdk_idxd_batch_create(chan->chan);
+		task_count = 0;
+		if (idxd_batch == NULL) {
+			/* Queue them all and try again later */
+			goto queue_tasks;
+		}
+
+		/* Keep track of each batch's tasks in case we need to cancel. */
+		TAILQ_INIT(&batch_tasks);
+		do {
+			switch (task->op_code) {
+			case ACCEL_OPCODE_MEMMOVE:
+				rc = spdk_idxd_batch_prep_copy(chan->chan, idxd_batch, task->dst, task->src, task->nbytes,
+							       idxd_done, task);
+				break;
+			case ACCEL_OPCODE_DUALCAST:
+				rc = spdk_idxd_batch_prep_dualcast(chan->chan, idxd_batch, task->dst, task->dst2,
+								   task->src, task->nbytes, idxd_done, task);
+				break;
+			case ACCEL_OPCODE_COMPARE:
+				rc = spdk_idxd_batch_prep_compare(chan->chan, idxd_batch, task->src, task->src2,
+								  task->nbytes, idxd_done, task);
+				break;
+			case ACCEL_OPCODE_MEMFILL:
+				rc = spdk_idxd_batch_prep_fill(chan->chan, idxd_batch, task->dst, task->fill_pattern,
+							       task->nbytes, idxd_done, task);
+				break;
+			case ACCEL_OPCODE_CRC32C:
+				rc = spdk_idxd_batch_prep_crc32c(chan->chan, idxd_batch, task->dst, task->src,
+								 task->seed, task->nbytes, idxd_done, task);
+				break;
+			default:
+				assert(false);
+				break;
+			}
+
+			tmp = TAILQ_NEXT(task, link);
+
+			if (rc == 0) {
+				TAILQ_INSERT_TAIL(&batch_tasks, task, link);
+			} else {
+				assert(rc != -EBUSY);
+				spdk_accel_task_complete(task, rc);
+			}
+
+			task_count++;
+			task = tmp;
+		} while (task && task_count < g_batch_max);
+
+		if (!TAILQ_EMPTY(&batch_tasks)) {
+			rc = spdk_idxd_batch_submit(chan->chan, idxd_batch, NULL, NULL);
+
+			/* If we can't submit the batch, just destroy it and queue up all the operations
+			 * from the latest batch and try again later. If this list was from an accel_fw batch,
+			 * all of the batch info is still associated with the tasks that we're about to
+			 * queue up so nothing is lost.
+			 */
+			if (rc) {
+				spdk_idxd_batch_cancel(chan->chan, idxd_batch);
+				while (!TAILQ_EMPTY(&batch_tasks)) {
+					batch_task = TAILQ_FIRST(&batch_tasks);
+					TAILQ_REMOVE(&batch_tasks, batch_task, link);
+					TAILQ_INSERT_TAIL(&chan->queued_tasks, batch_task, link);
+				}
+				rc = 0;
+			}
+		} else {
+			/* the last batch task list was empty so all tasks had their cb_fn called. */
+			rc = 0;
+		}
+	} while (task && rc == 0);
+
+	return 0;
+
+queue_tasks:
+	while (task != NULL) {
+		tmp = TAILQ_NEXT(task, link);
+		TAILQ_INSERT_TAIL(&chan->queued_tasks, task, link);
+		task = tmp;
+	}
+	return 0;
+}
+
 static int
 idxd_poll(void *arg)
 {
 	struct idxd_io_channel *chan = arg;
-	struct idxd_op *op = NULL;
-	int rc;
+	struct spdk_accel_task *task = NULL;
 
 	spdk_idxd_process_events(chan->chan);
 
@@ -137,45 +276,13 @@ idxd_poll(void *arg)
 		return -1;
 	}
 
-	while (!TAILQ_EMPTY(&chan->queued_ops)) {
-		op = TAILQ_FIRST(&chan->queued_ops);
+	/* Submit queued tasks */
+	if (!TAILQ_EMPTY(&chan->queued_tasks)) {
+		task = TAILQ_FIRST(&chan->queued_tasks);
 
-		switch (op->op_code) {
-		case IDXD_OPCODE_MEMMOVE:
-			rc = spdk_idxd_submit_copy(op->chan, op->dst, op->src, op->nbytes,
-						   op->cb_fn, op->cb_arg);
-			break;
-		case IDXD_OPCODE_DUALCAST:
-			rc = spdk_idxd_submit_dualcast(op->chan, op->dst, op->dst2, op->src, op->nbytes,
-						       op->cb_fn, op->cb_arg);
-			break;
-		case IDXD_OPCODE_COMPARE:
-			rc = spdk_idxd_submit_compare(op->chan, op->src, op->src2, op->nbytes,
-						      op->cb_fn, op->cb_arg);
-			break;
-		case IDXD_OPCODE_MEMFILL:
-			rc = spdk_idxd_submit_fill(op->chan, op->dst, op->fill_pattern, op->nbytes,
-						   op->cb_fn, op->cb_arg);
-			break;
-		case IDXD_OPCODE_CRC32C_GEN:
-			rc = spdk_idxd_submit_crc32c(op->chan, op->dst, op->src, op->seed, op->nbytes,
-						     op->cb_fn, op->cb_arg);
-			break;
-		case IDXD_OPCODE_BATCH:
-			rc = spdk_idxd_batch_submit(op->chan, op->batch, op->cb_fn, op->cb_arg);
-			break;
-		default:
-			/* Should never get here */
-			assert(false);
-			break;
-		}
-		if (rc == 0) {
-			TAILQ_REMOVE(&chan->queued_ops, op, link);
-			free(op);
-		} else {
-			/* Busy, resubmit to try again later */
-			break;
-		}
+		TAILQ_INIT(&chan->queued_tasks);
+
+		idxd_submit_tasks(task->accel_ch->engine_ch, task);
 	}
 
 	return -1;
@@ -184,403 +291,27 @@ idxd_poll(void *arg)
 static size_t
 accel_engine_idxd_get_ctx_size(void)
 {
-	return sizeof(struct idxd_task) + sizeof(struct spdk_accel_task);
-}
-
-static void
-idxd_done(void *cb_arg, int status)
-{
-	struct spdk_accel_task *accel_task;
-	struct idxd_task *idxd_task = cb_arg;
-
-	accel_task = SPDK_CONTAINEROF(idxd_task, struct spdk_accel_task,
-				      offload_ctx);
-
-	idxd_task->cb(accel_task, status);
-}
-
-static struct idxd_op *
-_prep_queue_command(struct idxd_io_channel *chan, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_op *op_to_queue;
-
-	op_to_queue = calloc(1, sizeof(struct idxd_op));
-	if (op_to_queue == NULL) {
-		SPDK_ERRLOG("Failed to allocate operation for queueing\n");
-		return NULL;
-	}
-
-	op_to_queue->chan = chan->chan;
-	op_to_queue->cb_fn = cb_fn;
-	op_to_queue->cb_arg = cb_arg;
-
-	return op_to_queue;
-}
-
-static int
-idxd_submit_copy(struct spdk_io_channel *ch, void *dst, void *src, uint64_t nbytes,
-		 spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	int rc = 0;
-
-	idxd_task->cb = cb_fn;
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_submit_copy(chan->chan, dst, src, nbytes, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->dst = dst;
-		op_to_queue->src = src;
-		op_to_queue->nbytes = nbytes;
-		op_to_queue->op_code = IDXD_OPCODE_MEMMOVE;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-idxd_submit_dualcast(struct spdk_io_channel *ch, void *dst1, void *dst2, void *src,
-		     uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	int rc = 0;
-
-	idxd_task->cb = cb_fn;
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_submit_dualcast(chan->chan, dst1, dst2, src, nbytes, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->dst = dst1;
-		op_to_queue->dst2 = dst2;
-		op_to_queue->src = src;
-		op_to_queue->nbytes = nbytes;
-		op_to_queue->op_code = IDXD_OPCODE_DUALCAST;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-idxd_submit_compare(struct spdk_io_channel *ch, void *src1, void *src2,
-		    uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	int rc = 0;
-
-	idxd_task->cb = cb_fn;
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_submit_compare(chan->chan, src1, src2, nbytes, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->src = src1;
-		op_to_queue->src2 = src2;
-		op_to_queue->nbytes = nbytes;
-		op_to_queue->op_code = IDXD_OPCODE_COMPARE;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-idxd_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill,
-		 uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	int rc = 0;
-	uint64_t fill_pattern;
-
-	idxd_task->cb = cb_fn;
-	memset(&fill_pattern, fill, sizeof(uint64_t));
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_submit_fill(chan->chan, dst, fill_pattern, nbytes, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->dst = dst;
-		op_to_queue->fill_pattern = fill_pattern;
-		op_to_queue->nbytes = nbytes;
-		op_to_queue->op_code = IDXD_OPCODE_MEMFILL;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-idxd_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src,
-		   uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	int rc = 0;
-
-	idxd_task->cb = cb_fn;
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_submit_crc32c(chan->chan, dst, src, seed, nbytes, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->dst = dst;
-		op_to_queue->src = src;
-		op_to_queue->seed = seed;
-		op_to_queue->nbytes = nbytes;
-		op_to_queue->op_code = IDXD_OPCODE_CRC32C_GEN;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
+	return 0;
 }
 
 static uint64_t
 idxd_get_capabilities(void)
 {
 	return ACCEL_COPY | ACCEL_FILL | ACCEL_CRC32C | ACCEL_COMPARE |
-	       ACCEL_DUALCAST | ACCEL_BATCH;
+	       ACCEL_DUALCAST;
 }
 
 static uint32_t
-idxd_batch_get_max(void)
+idxd_batch_get_max(struct spdk_io_channel *ch)
 {
 	return spdk_idxd_batch_get_max();
 }
 
-static struct spdk_accel_batch *
-idxd_batch_start(struct spdk_io_channel *ch)
-{
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-
-	return (struct spdk_accel_batch *)spdk_idxd_batch_create(chan->chan);
-}
-
-static int
-idxd_batch_cancel(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch)
-{
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	return spdk_idxd_batch_cancel(chan->chan, batch);
-}
-
-static int
-idxd_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-		  spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-	int rc = 0;
-
-	idxd_task->cb = cb_fn;
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_batch_submit(chan->chan, batch, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->batch = batch;
-		op_to_queue->op_code = IDXD_OPCODE_BATCH;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-idxd_batch_prep_copy(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-		     void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	idxd_task->cb = cb_fn;
-
-	return spdk_idxd_batch_prep_copy(chan->chan, batch, dst, src, nbytes,
-					 idxd_done, idxd_task);
-}
-
-static int
-idxd_batch_prep_fill(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-		     void *dst, uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	uint64_t fill_pattern;
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	idxd_task->cb = cb_fn;
-	memset(&fill_pattern, fill, sizeof(uint64_t));
-
-	return spdk_idxd_batch_prep_fill(chan->chan, batch, dst, fill_pattern, nbytes, idxd_done,
-					 idxd_task);
-}
-
-static int
-idxd_batch_prep_dualcast(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-			 void *dst1, void *dst2, void *src, uint64_t nbytes,
-			 spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	idxd_task->cb = cb_fn;
-
-	return spdk_idxd_batch_prep_dualcast(chan->chan, batch, dst1, dst2, src, nbytes, idxd_done,
-					     idxd_task);
-}
-
-static int
-idxd_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-		       uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
-		       spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	idxd_task->cb = cb_fn;
-
-	return spdk_idxd_batch_prep_crc32c(chan->chan, batch, dst, src, seed, nbytes, idxd_done,
-					   idxd_task);
-}
-
-static int
-idxd_batch_prep_compare(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-			void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	idxd_task->cb = cb_fn;
-
-	return spdk_idxd_batch_prep_compare(chan->chan, batch, src1, src2, nbytes, idxd_done,
-					    idxd_task);
-}
-
 static struct spdk_accel_engine idxd_accel_engine = {
 	.get_capabilities	= idxd_get_capabilities,
-	.copy			= idxd_submit_copy,
-	.batch_get_max		= idxd_batch_get_max,
-	.batch_create		= idxd_batch_start,
-	.batch_cancel		= idxd_batch_cancel,
-	.batch_prep_copy	= idxd_batch_prep_copy,
-	.batch_prep_fill	= idxd_batch_prep_fill,
-	.batch_prep_dualcast	= idxd_batch_prep_dualcast,
-	.batch_prep_crc32c	= idxd_batch_prep_crc32c,
-	.batch_prep_compare	= idxd_batch_prep_compare,
-	.batch_submit		= idxd_batch_submit,
-	.dualcast		= idxd_submit_dualcast,
-	.compare		= idxd_submit_compare,
-	.fill			= idxd_submit_fill,
-	.crc32c			= idxd_submit_crc32c,
 	.get_io_channel		= idxd_get_io_channel,
+	.batch_get_max		= idxd_batch_get_max,
+	.submit_tasks		= idxd_submit_tasks,
 };
 
 /*
@@ -652,7 +383,7 @@ idxd_create_cb(void *io_device, void *ctx_buf)
 
 	chan->dev = dev;
 	chan->poller = spdk_poller_register(idxd_poll, chan, 0);
-	TAILQ_INIT(&chan->queued_ops);
+	TAILQ_INIT(&chan->queued_tasks);
 
 	/*
 	 * Configure the channel but leave paused until all others
@@ -792,6 +523,7 @@ accel_engine_idxd_init(void)
 	}
 
 	g_idxd_initialized = true;
+	g_batch_max = spdk_idxd_batch_get_max();
 	SPDK_NOTICELOG("Accel engine updated to use IDXD DSA engine.\n");
 	spdk_accel_hw_engine_register(&idxd_accel_engine);
 	spdk_io_device_register(&idxd_accel_engine, idxd_create_cb, idxd_destroy_cb,
diff --git a/module/accel/ioat/accel_engine_ioat.c b/module/accel/ioat/accel_engine_ioat.c
index ba00ba3b8..a6c82c6f8 100644
--- a/module/accel/ioat/accel_engine_ioat.c
+++ b/module/accel/ioat/accel_engine_ioat.c
@@ -42,36 +42,7 @@
 #include "spdk/event.h"
 #include "spdk/thread.h"
 #include "spdk/ioat.h"
-#include "spdk/crc32.h"
 
-#define ALIGN_4K 0x1000
-
-enum ioat_accel_opcode {
-	IOAT_ACCEL_OPCODE_MEMMOVE	= 0,
-	IOAT_ACCEL_OPCODE_MEMFILL	= 1,
-	IOAT_ACCEL_OPCODE_COMPARE	= 2,
-	IOAT_ACCEL_OPCODE_CRC32C	= 3,
-	IOAT_ACCEL_OPCODE_DUALCAST	= 4,
-};
-
-struct ioat_accel_op {
-	struct ioat_io_channel		*ioat_ch;
-	void				*cb_arg;
-	spdk_accel_completion_cb	cb_fn;
-	void				*src;
-	union {
-		void			*dst;
-		void			*src2;
-	};
-	void				*dst2;
-	uint32_t			seed;
-	uint64_t			fill_pattern;
-	enum ioat_accel_opcode		op_code;
-	uint64_t			nbytes;
-	TAILQ_ENTRY(ioat_accel_op)	link;
-};
-
-static int g_batch_size;
 static bool g_ioat_enable = false;
 static bool g_ioat_initialized = false;
 
@@ -103,9 +74,6 @@ struct ioat_io_channel {
 	struct spdk_ioat_chan		*ioat_ch;
 	struct ioat_device		*ioat_dev;
 	struct spdk_poller		*poller;
-	TAILQ_HEAD(, ioat_accel_op)	op_pool;
-	TAILQ_HEAD(, ioat_accel_op)	sw_batch; /* for operations not hw accelerated */
-	bool				hw_batch; /* for operations that are hw accelerated */
 };
 
 static int
@@ -149,17 +117,13 @@ ioat_free_device(struct ioat_device *dev)
 	pthread_mutex_unlock(&g_ioat_mutex);
 }
 
-struct ioat_task {
-	spdk_accel_completion_cb	cb;
-};
-
 static int accel_engine_ioat_init(void);
 static void accel_engine_ioat_exit(void *ctx);
 
 static size_t
 accel_engine_ioat_get_ctx_size(void)
 {
-	return sizeof(struct ioat_task) + sizeof(struct spdk_accel_task);
+	return 0;
 }
 
 SPDK_ACCEL_MODULE_REGISTER(accel_engine_ioat_init, accel_engine_ioat_exit,
@@ -168,43 +132,9 @@ SPDK_ACCEL_MODULE_REGISTER(accel_engine_ioat_init, accel_engine_ioat_exit,
 static void
 ioat_done(void *cb_arg)
 {
-	struct spdk_accel_task *accel_task;
-	struct ioat_task *ioat_task = cb_arg;
+	struct spdk_accel_task *accel_task = cb_arg;
 
-	accel_task = (struct spdk_accel_task *)
-		     ((uintptr_t)ioat_task -
-		      offsetof(struct spdk_accel_task, offload_ctx));
-
-	ioat_task->cb(accel_task, 0);
-}
-
-static int
-ioat_submit_copy(struct spdk_io_channel *ch, void *dst, void *src, uint64_t nbytes,
-		 spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-
-	assert(ioat_ch->ioat_ch != NULL);
-
-	ioat_task->cb = cb_fn;
-
-	return spdk_ioat_submit_copy(ioat_ch->ioat_ch, ioat_task, ioat_done, dst, src, nbytes);
-}
-
-static int
-ioat_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill,
-		 uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-	uint64_t fill64 = 0x0101010101010101ULL * fill;
-
-	assert(ioat_ch->ioat_ch != NULL);
-
-	ioat_task->cb = cb_fn;
-
-	return spdk_ioat_submit_fill(ioat_ch->ioat_ch, ioat_task, ioat_done, dst, fill64, nbytes);
+	spdk_accel_task_complete(accel_task, 0);
 }
 
 static int
@@ -218,266 +148,62 @@ ioat_poll(void *arg)
 
 static struct spdk_io_channel *ioat_get_io_channel(void);
 
-/*
- * The IOAT engine only supports these capabilities as hardware
- * accelerated. The accel fw will handle unsupported functions
- * by calling the software implementations of the functions.
- */
 static uint64_t
 ioat_get_capabilities(void)
 {
-	return ACCEL_COPY | ACCEL_FILL | ACCEL_BATCH;
+	return ACCEL_COPY | ACCEL_FILL;
 }
 
-/* The IOAT batch functions exposed by the accel fw do not match up 1:1
- * with the functions in the IOAT library. The IOAT library directly only
- * supports construction of accelerated functions via the IOAT native
- * interface.  The accel_fw batch capabilities are implemented here in the
- * plug-in and rely on either the IOAT library for accelerated commands
- * or software functions for non-accelerated.
- */
 static uint32_t
-ioat_batch_get_max(void)
-{
-	return g_batch_size;
-}
-
-static struct spdk_accel_batch *
-ioat_batch_create(struct spdk_io_channel *ch)
+ioat_batch_get_max(struct spdk_io_channel *ch)
 {
 	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
 
-	if (!TAILQ_EMPTY(&ioat_ch->sw_batch) || (ioat_ch->hw_batch == true)) {
-		SPDK_ERRLOG("IOAT accel engine only supports one batch at a time.\n");
-		return NULL;
-	}
-
-	return (struct spdk_accel_batch *)&ioat_ch->hw_batch;
-}
-
-static struct ioat_accel_op *
-_prep_op(struct ioat_io_channel *ioat_ch, struct spdk_accel_batch *batch,
-	 spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_accel_op *op;
-
-	if ((struct spdk_accel_batch *)&ioat_ch->hw_batch != batch) {
-		SPDK_ERRLOG("Invalid batch\n");
-		return NULL;
-	}
-
-	if (!TAILQ_EMPTY(&ioat_ch->op_pool)) {
-		op = TAILQ_FIRST(&ioat_ch->op_pool);
-		TAILQ_REMOVE(&ioat_ch->op_pool, op, link);
-	} else {
-		SPDK_ERRLOG("Ran out of operations for batch\n");
-		return NULL;
-	}
-
-	op->cb_arg = cb_arg;
-	op->cb_fn = cb_fn;
-	op->ioat_ch = ioat_ch;
-
-	return op;
+	return spdk_ioat_get_max_descriptors(ioat_ch->ioat_dev->ioat);
 }
 
 static int
-ioat_batch_prep_copy(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-		     void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
+ioat_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *accel_task)
 {
 	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
+	struct spdk_accel_task *tmp;
+	int rc = 0;
 
-	ioat_task->cb = cb_fn;
-	ioat_ch->hw_batch = true;
-
-	/* Call the IOAT library prep function. */
-	return spdk_ioat_build_copy(ioat_ch->ioat_ch, ioat_task, ioat_done, dst, src, nbytes);
-}
-
-static int
-ioat_batch_prep_fill(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *dst,
-		     uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
-	uint64_t fill_pattern;
-
-	ioat_task->cb = cb_fn;
-	ioat_ch->hw_batch = true;
-	memset(&fill_pattern, fill, sizeof(uint64_t));
-
-	/* Call the IOAT library prep function. */
-	return spdk_ioat_build_fill(ioat_ch->ioat_ch, ioat_task, ioat_done, dst, fill_pattern, nbytes);
-}
-
-static int
-ioat_batch_prep_dualcast(struct spdk_io_channel *ch,
-			 struct spdk_accel_batch *batch, void *dst1, void *dst2,
-			 void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_accel_op *op;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-
-	if ((uintptr_t)dst1 & (ALIGN_4K - 1) || (uintptr_t)dst2 & (ALIGN_4K - 1)) {
-		SPDK_ERRLOG("Dualcast requires 4K alignment on dst addresses\n");
-		return -EINVAL;
-	}
-
-	op = _prep_op(ioat_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->src = src;
-	op->dst = dst1;
-	op->dst2 = dst2;
-	op->nbytes = nbytes;
-	op->op_code = IOAT_ACCEL_OPCODE_DUALCAST;
-	TAILQ_INSERT_TAIL(&ioat_ch->sw_batch, op, link);
-
-	return 0;
-}
-
-static int
-ioat_batch_prep_compare(struct spdk_io_channel *ch,
-			struct spdk_accel_batch *batch, void *src1,
-			void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_accel_op *op;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(ioat_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->src = src1;
-	op->src2 = src2;
-	op->nbytes = nbytes;
-	op->op_code = IOAT_ACCEL_OPCODE_COMPARE;
-	TAILQ_INSERT_TAIL(&ioat_ch->sw_batch, op, link);
-
-	return 0;
-}
-
-static int
-ioat_batch_prep_crc32c(struct spdk_io_channel *ch,
-		       struct spdk_accel_batch *batch, uint32_t *dst, void *src,
-		       uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_accel_op *op;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(ioat_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->dst = (void *)dst;
-	op->src = src;
-	op->seed = seed;
-	op->nbytes = nbytes;
-	op->op_code = IOAT_ACCEL_OPCODE_CRC32C;
-	TAILQ_INSERT_TAIL(&ioat_ch->sw_batch, op, link);
-
-	return 0;
-}
-
-static int
-ioat_batch_cancel(struct spdk_io_channel *ch, struct spdk_accel_batch *batch)
-{
-	struct ioat_accel_op *op;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-
-	if ((struct spdk_accel_batch *)&ioat_ch->hw_batch != batch) {
-		SPDK_ERRLOG("Invalid batch\n");
-		return -EINVAL;
-	}
-
-	/* Flush the batched HW items, there's no way to cancel these without resetting. */
-	spdk_ioat_flush(ioat_ch->ioat_ch);
-	ioat_ch->hw_batch = false;
-
-	/* Return batched software items to the pool. */
-	while ((op = TAILQ_FIRST(&ioat_ch->sw_batch))) {
-		TAILQ_REMOVE(&ioat_ch->sw_batch, op, link);
-		TAILQ_INSERT_TAIL(&ioat_ch->op_pool, op, link);
-	}
-
-	return 0;
-}
-
-static int
-ioat_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-		  spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_accel_op *op;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-	int batch_status = 0, cmd_status = 0;
-
-	if ((struct spdk_accel_batch *)&ioat_ch->hw_batch != batch) {
-		SPDK_ERRLOG("Invalid batch\n");
-		return -EINVAL;
-	}
-
-	/* Flush the batched HW items first. */
-	spdk_ioat_flush(ioat_ch->ioat_ch);
-	ioat_ch->hw_batch = false;
-
-	/* Complete the batched software items. */
-	while ((op = TAILQ_FIRST(&ioat_ch->sw_batch))) {
-		TAILQ_REMOVE(&ioat_ch->sw_batch, op, link);
-		accel_task = (struct spdk_accel_task *)((uintptr_t)op->cb_arg -
-							offsetof(struct spdk_accel_task, offload_ctx));
-
-		switch (op->op_code) {
-		case IOAT_ACCEL_OPCODE_DUALCAST:
-			memcpy(op->dst, op->src, op->nbytes);
-			memcpy(op->dst2, op->src, op->nbytes);
+	do {
+		switch (accel_task->op_code) {
+		case ACCEL_OPCODE_MEMFILL:
+			rc = spdk_ioat_build_fill(ioat_ch->ioat_ch, accel_task, ioat_done,
+						  accel_task->dst, accel_task->fill_pattern, accel_task->nbytes);
 			break;
-		case IOAT_ACCEL_OPCODE_COMPARE:
-			cmd_status = memcmp(op->src, op->src2, op->nbytes);
-			break;
-		case IOAT_ACCEL_OPCODE_CRC32C:
-			*(uint32_t *)op->dst = spdk_crc32c_update(op->src, op->nbytes, ~op->seed);
+		case ACCEL_OPCODE_MEMMOVE:
+			rc = spdk_ioat_build_copy(ioat_ch->ioat_ch, accel_task, ioat_done,
+						  accel_task->dst, accel_task->src, accel_task->nbytes);
 			break;
 		default:
 			assert(false);
 			break;
 		}
 
-		batch_status |= cmd_status;
-		op->cb_fn(accel_task, cmd_status);
-		TAILQ_INSERT_TAIL(&ioat_ch->op_pool, op, link);
-	}
+		tmp = TAILQ_NEXT(accel_task, link);
 
-	/* Now complete the batch request itself. */
-	accel_task = (struct spdk_accel_task *)((uintptr_t)cb_arg -
-						offsetof(struct spdk_accel_task, offload_ctx));
-	cb_fn(accel_task, batch_status);
+		/* Report any build errors via the callback now. */
+		if (rc) {
+			spdk_accel_task_complete(accel_task, rc);
+		}
+
+		accel_task = tmp;
+	} while (accel_task);
+
+	spdk_ioat_flush(ioat_ch->ioat_ch);
 
 	return 0;
 }
 
 static struct spdk_accel_engine ioat_accel_engine = {
 	.get_capabilities	= ioat_get_capabilities,
-	.copy			= ioat_submit_copy,
-	.fill			= ioat_submit_fill,
-	.batch_get_max		= ioat_batch_get_max,
-	.batch_create		= ioat_batch_create,
-	.batch_cancel		= ioat_batch_cancel,
-	.batch_prep_copy	= ioat_batch_prep_copy,
-	.batch_prep_dualcast	= ioat_batch_prep_dualcast,
-	.batch_prep_compare	= ioat_batch_prep_compare,
-	.batch_prep_fill	= ioat_batch_prep_fill,
-	.batch_prep_crc32c	= ioat_batch_prep_crc32c,
-	.batch_submit		= ioat_batch_submit,
 	.get_io_channel		= ioat_get_io_channel,
+	.batch_get_max		= ioat_batch_get_max,
+	.submit_tasks		= ioat_submit_tasks,
 };
 
 static int
@@ -485,35 +211,16 @@ ioat_create_cb(void *io_device, void *ctx_buf)
 {
 	struct ioat_io_channel *ch = ctx_buf;
 	struct ioat_device *ioat_dev;
-	struct ioat_accel_op *op;
-	int i;
 
 	ioat_dev = ioat_allocate_device();
 	if (ioat_dev == NULL) {
 		return -1;
 	}
 
-	TAILQ_INIT(&ch->sw_batch);
-	ch->hw_batch = false;
-	TAILQ_INIT(&ch->op_pool);
-
-	g_batch_size = spdk_ioat_get_max_descriptors(ioat_dev->ioat);
-	for (i = 0 ; i < g_batch_size ; i++) {
-		op = calloc(1, sizeof(struct ioat_accel_op));
-		if (op == NULL) {
-			SPDK_ERRLOG("Failed to allocate operation for batch.\n");
-			while ((op = TAILQ_FIRST(&ch->op_pool))) {
-				TAILQ_REMOVE(&ch->op_pool, op, link);
-				free(op);
-			}
-			return -ENOMEM;
-		}
-		TAILQ_INSERT_TAIL(&ch->op_pool, op, link);
-	}
-
 	ch->ioat_dev = ioat_dev;
 	ch->ioat_ch = ioat_dev->ioat;
 	ch->poller = SPDK_POLLER_REGISTER(ioat_poll, ch->ioat_ch, 0);
+
 	return 0;
 }
 
@@ -521,12 +228,6 @@ static void
 ioat_destroy_cb(void *io_device, void *ctx_buf)
 {
 	struct ioat_io_channel *ch = ctx_buf;
-	struct ioat_accel_op *op;
-
-	while ((op = TAILQ_FIRST(&ch->op_pool))) {
-		TAILQ_REMOVE(&ch->op_pool, op, link);
-		free(op);
-	}
 
 	ioat_free_device(ch->ioat_dev);
 	spdk_poller_unregister(&ch->poller);