diff --git a/include/spdk/accel_engine.h b/include/spdk/accel_engine.h
index d682ad1c4..e8056d34e 100644
--- a/include/spdk/accel_engine.h
+++ b/include/spdk/accel_engine.h
@@ -49,9 +49,8 @@ enum accel_capability {
 	ACCEL_FILL		= 1 << 1,
 	ACCEL_DUALCAST		= 1 << 2,
 	ACCEL_COMPARE		= 1 << 3,
-	ACCEL_BATCH		= 1 << 4,
-	ACCEL_CRC32C		= 1 << 5,
-	ACCEL_DIF		= 1 << 6,
+	ACCEL_CRC32C		= 1 << 4,
+	ACCEL_DIF		= 1 << 5,
 };
 
 /**
@@ -69,10 +68,6 @@ typedef void (*spdk_accel_completion_cb)(void *ref, int status);
  */
 typedef void (*spdk_accel_fini_cb)(void *cb_arg);
 
-struct spdk_io_channel;
-
-struct spdk_accel_batch;
-
 /**
  * Initialize the acceleration engine.
  *
@@ -103,11 +98,11 @@ void spdk_accel_engine_module_finish(void);
 struct spdk_io_channel *spdk_accel_engine_get_io_channel(void);
 
 /**
- * Retrieve accel engine capabilities.
+ * Retrieve accel engine HW acceleration capabilities.
  *
  * \param ch I/O channel associated with this call.
  *
- * \return bitmap of capabilities defined by enum accel_capability.
+ * \return bitmap of HW acceleration capabilities defined by enum accel_capability.
  */
 uint64_t spdk_accel_get_capabilities(struct spdk_io_channel *ch);
 
diff --git a/include/spdk_internal/accel_engine.h b/include/spdk_internal/accel_engine.h
index 9fa77f30b..1433ce0a8 100644
--- a/include/spdk_internal/accel_engine.h
+++ b/include/spdk_internal/accel_engine.h
@@ -39,44 +39,67 @@
 #include "spdk/accel_engine.h"
 #include "spdk/queue.h"
 
-struct spdk_accel_task {
-	spdk_accel_completion_cb	cb;
+struct spdk_accel_task;
+
+void spdk_accel_task_complete(struct spdk_accel_task *task, int status);
+
+struct accel_io_channel {
+	struct spdk_accel_engine	*engine;
+	struct spdk_io_channel		*engine_ch;
+	void				*task_pool_base;
+	TAILQ_HEAD(, spdk_accel_task)	task_pool;
+	void				*batch_pool_base;
+	TAILQ_HEAD(, spdk_accel_batch)	batch_pool;
+	TAILQ_HEAD(, spdk_accel_batch)	batches;
+};
+
+struct spdk_accel_batch {
+	/* Lists of commands in the batch. */
+	TAILQ_HEAD(, spdk_accel_task)	hw_tasks;
+	TAILQ_HEAD(, spdk_accel_task)	sw_tasks;
+	/* Specific to the batch task itself. */
+	int				status;
+	uint32_t			count;
+	spdk_accel_completion_cb	cb_fn;
 	void				*cb_arg;
 	struct accel_io_channel		*accel_ch;
+	TAILQ_ENTRY(spdk_accel_batch)	link;
+};
+
+enum accel_opcode {
+	ACCEL_OPCODE_MEMMOVE	= 0,
+	ACCEL_OPCODE_MEMFILL	= 1,
+	ACCEL_OPCODE_COMPARE	= 2,
+	ACCEL_OPCODE_BATCH	= 3,
+	ACCEL_OPCODE_CRC32C	= 4,
+	ACCEL_OPCODE_DUALCAST	= 5,
+};
+
+struct spdk_accel_task {
+	struct accel_io_channel		*accel_ch;
+	struct spdk_accel_batch		*batch;
+	spdk_accel_completion_cb	cb_fn;
+	void				*cb_arg;
+	void				*src;
+	union {
+		void			*dst;
+		void			*src2;
+	};
+	void				*dst2;
+	uint32_t			seed;
+	uint64_t			fill_pattern;
+	enum accel_opcode		op_code;
+	uint64_t			nbytes;
 	TAILQ_ENTRY(spdk_accel_task)	link;
-	uint8_t				offload_ctx[0];
+	uint8_t				offload_ctx[0]; /* Not currently used. */
 };
 
 struct spdk_accel_engine {
+	uint64_t capabilities;
 	uint64_t (*get_capabilities)(void);
-	int (*copy)(struct spdk_io_channel *ch, void *dst, void *src,
-		    uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*dualcast)(struct spdk_io_channel *ch, void *dst1, void *dst2, void *src,
-			uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	uint32_t (*batch_get_max)(void);
-	struct spdk_accel_batch *(*batch_create)(struct spdk_io_channel *ch);
-	int (*batch_prep_copy)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			       void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_prep_dualcast)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-				   void *dst1, void *dst2, void *src, uint64_t nbytes,
-				   spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_prep_compare)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-				  void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_prep_fill)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			       void *dst, uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_prep_crc32c)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-				 uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
-				 spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_submit)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			    spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_cancel)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch);
-	int (*compare)(struct spdk_io_channel *ch, void *src1, void *src2,
-		       uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*fill)(struct spdk_io_channel *ch, void *dst, uint8_t fill,
-		    uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*crc32c)(struct spdk_io_channel *ch, uint32_t *dst, void *src,
-		      uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
 	struct spdk_io_channel *(*get_io_channel)(void);
+	uint32_t (*batch_get_max)(struct spdk_io_channel *ch);
+	int (*submit_tasks)(struct spdk_io_channel *ch, struct spdk_accel_task *accel_task);
 };
 
 struct spdk_accel_module_if {
diff --git a/lib/accel/accel_engine.c b/lib/accel/accel_engine.c
index ce260ccbf..b65528984 100644
--- a/lib/accel/accel_engine.c
+++ b/lib/accel/accel_engine.c
@@ -40,16 +40,19 @@
 #include "spdk/thread.h"
 #include "spdk/json.h"
 #include "spdk/crc32.h"
+#include "spdk/util.h"
 
 /* Accelerator Engine Framework: The following provides a top level
  * generic API for the accelerator functions defined here. Modules,
- * such as the one in /module/accel/ioat, supply the implemention of
+ * such as the one in /module/accel/ioat, supply the implemention
  * with the exception of the pure software implemention contained
  * later in this file.
  */
 
-#define ALIGN_4K		0x1000
-#define MAX_TASKS_PER_CHANNEL	0x400
+#define ALIGN_4K			0x1000
+#define MAX_TASKS_PER_CHANNEL		0x800
+#define MAX_BATCH_SIZE			0x80
+#define MAX_NUM_BATCHES_PER_CHANNEL	(MAX_TASKS_PER_CHANNEL / MAX_BATCH_SIZE)
 
 /* Largest context size for all accel modules */
 static size_t g_max_accel_module_size = 0;
@@ -64,27 +67,11 @@ static void *g_fini_cb_arg = NULL;
 static TAILQ_HEAD(, spdk_accel_module_if) spdk_accel_module_list =
 	TAILQ_HEAD_INITIALIZER(spdk_accel_module_list);
 
-struct accel_io_channel {
-	struct spdk_accel_engine	*engine;
-	struct spdk_io_channel		*ch;
-	void				*task_pool_base;
-	TAILQ_HEAD(, spdk_accel_task)	task_pool;
-};
-
-/* Forward declarations of software implementations used when an
- * engine has not implemented the capability.
- */
-static int sw_accel_submit_dualcast(struct spdk_io_channel *ch, void *dst1, void *dst2, void *src,
-				    uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-static int sw_accel_submit_copy(struct spdk_io_channel *ch, void *dst, void *src,
-				uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-static int sw_accel_submit_compare(struct spdk_io_channel *ch, void *src1, void *src2,
-				   uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-static int sw_accel_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill,
-				uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-static int sw_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src,
-				  uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn,
-				  void *cb_arg);
+static void _sw_accel_dualcast(void *dst1, void *dst2, void *src, uint64_t nbytes);
+static void _sw_accel_copy(void *dst, void *src, uint64_t nbytes);
+static int _sw_accel_compare(void *src1, void *src2, uint64_t nbytes);
+static void _sw_accel_fill(void *dst, uint8_t fill, uint64_t nbytes);
+static void _sw_accel_crc32c(uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes);
 
 /* Registration of hw modules (currently supports only 1 at a time) */
 void
@@ -111,38 +98,82 @@ accel_sw_unregister(void)
 	g_sw_accel_engine = NULL;
 }
 
-/* Common completion routine, called only by the accel framework */
-static void
-_accel_engine_done(void *ref, int status)
+/* Used to determine whether a command is sent to an engine/module or done here
+ * via SW implementation.
+ */
+inline static bool
+_is_supported(struct spdk_accel_engine *engine, enum accel_capability operation)
 {
-	struct spdk_accel_task *accel_task = (struct spdk_accel_task *)ref;
-
-	accel_task->cb(accel_task->cb_arg, status);
-	TAILQ_INSERT_TAIL(&accel_task->accel_ch->task_pool, accel_task, link);
+	return ((engine->capabilities & operation) == operation);
 }
 
+void
+spdk_accel_task_complete(struct spdk_accel_task *accel_task, int status)
+{
+	struct accel_io_channel *accel_ch = accel_task->accel_ch;
+	struct spdk_accel_batch *batch;
+
+	accel_task->cb_fn(accel_task->cb_arg, status);
+
+	/* If this task is part of a batch, check for completion of the batch. */
+	if (accel_task->batch) {
+		batch = accel_task->batch;
+		assert(batch->count > 0);
+		batch->count--;
+		if (batch->count == 0) {
+			SPDK_DEBUGLOG(accel, "Batch %p count %d\n", batch, batch->count);
+			if (batch->cb_fn) {
+				batch->cb_fn(batch->cb_arg, batch->status);
+			}
+			TAILQ_REMOVE(&accel_ch->batches, batch, link);
+			TAILQ_INSERT_TAIL(&accel_ch->batch_pool, batch, link);
+		}
+	}
+
+	TAILQ_INSERT_TAIL(&accel_ch->task_pool, accel_task, link);
+}
+
+/* Accel framework public API for discovering current engine capabilities. */
 uint64_t
 spdk_accel_get_capabilities(struct spdk_io_channel *ch)
 {
 	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
 
-	/* All engines are required to implement this API. */
-	return accel_ch->engine->get_capabilities();
+	return accel_ch->engine->capabilities;
+}
+
+inline static bool
+_is_batch_valid(struct spdk_accel_batch *batch, struct accel_io_channel *accel_ch)
+{
+	return (batch->accel_ch == accel_ch);
 }
 
 inline static struct spdk_accel_task *
-_get_task(struct accel_io_channel *accel_ch, spdk_accel_completion_cb cb_fn, void *cb_arg)
+_get_task(struct accel_io_channel *accel_ch, struct spdk_accel_batch *batch,
+	  spdk_accel_completion_cb cb_fn, void *cb_arg)
 {
-	struct spdk_accel_task *accel_task = TAILQ_FIRST(&accel_ch->task_pool);
+	struct spdk_accel_task *accel_task;
 
+	if (batch && _is_batch_valid(batch, accel_ch) == false) {
+		SPDK_ERRLOG("Attempt to access an invalid batch.\n.");
+		return NULL;
+	}
+
+	accel_task = TAILQ_FIRST(&accel_ch->task_pool);
 	if (accel_task == NULL) {
 		return NULL;
 	}
 	TAILQ_REMOVE(&accel_ch->task_pool, accel_task, link);
+	accel_task->link.tqe_next = NULL;
+	accel_task->link.tqe_prev = NULL;
 
-	accel_task->cb = cb_fn;
+	accel_task->cb_fn = cb_fn;
 	accel_task->cb_arg = cb_arg;
 	accel_task->accel_ch = accel_ch;
+	accel_task->batch = batch;
+	if (batch) {
+		batch->count++;
+	}
 
 	return accel_task;
 }
@@ -155,18 +186,22 @@ spdk_accel_submit_copy(struct spdk_io_channel *ch, void *dst, void *src, uint64_
 	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
 	struct spdk_accel_task *accel_task;
 
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
+	accel_task = _get_task(accel_ch, NULL, cb_fn, cb_arg);
 	if (accel_task == NULL) {
 		return -ENOMEM;
 	}
 
-	/* If the engine does not support it, fallback to the sw implementation. */
-	if (accel_ch->engine->copy) {
-		return accel_ch->engine->copy(accel_ch->ch, dst, src, nbytes,
-					      _accel_engine_done, accel_task->offload_ctx);
+	accel_task->dst = dst;
+	accel_task->src = src;
+	accel_task->op_code = ACCEL_OPCODE_MEMMOVE;
+	accel_task->nbytes = nbytes;
+
+	if (_is_supported(accel_ch->engine, ACCEL_COPY)) {
+		return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
 	} else {
-		return sw_accel_submit_copy(accel_ch->ch, dst, src, nbytes,
-					    _accel_engine_done, accel_task->offload_ctx);
+		_sw_accel_copy(dst, src, nbytes);
+		spdk_accel_task_complete(accel_task, 0);
+		return 0;
 	}
 }
 
@@ -183,176 +218,26 @@ spdk_accel_submit_dualcast(struct spdk_io_channel *ch, void *dst1, void *dst2, v
 		return -EINVAL;
 	}
 
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
+	accel_task = _get_task(accel_ch, NULL, cb_fn, cb_arg);
 	if (accel_task == NULL) {
 		return -ENOMEM;
 	}
 
-	/* If the engine does not support it, fallback to the sw implementation. */
-	if (accel_ch->engine->dualcast) {
-		return accel_ch->engine->dualcast(accel_ch->ch, dst1, dst2, src, nbytes,
-						  _accel_engine_done, accel_task->offload_ctx);
+	accel_task->src = src;
+	accel_task->dst = dst1;
+	accel_task->dst2 = dst2;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_DUALCAST;
+
+	if (_is_supported(accel_ch->engine, ACCEL_DUALCAST)) {
+		return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
 	} else {
-		return sw_accel_submit_dualcast(accel_ch->ch, dst1, dst2, src, nbytes,
-						_accel_engine_done, accel_task->offload_ctx);
+		_sw_accel_dualcast(dst1, dst2, src, nbytes);
+		spdk_accel_task_complete(accel_task, 0);
+		return 0;
 	}
 }
 
-/* Accel framework public API for batch_create function. All engines are
- * required to implement this API.
- */
-struct spdk_accel_batch *
-spdk_accel_batch_create(struct spdk_io_channel *ch)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-
-	return accel_ch->engine->batch_create(accel_ch->ch);
-}
-
-/* Accel framework public API for batch_submit function. All engines are
- * required to implement this API.
- */
-int
-spdk_accel_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
-	if (accel_task == NULL) {
-		return -ENOMEM;
-	}
-
-	return accel_ch->engine->batch_submit(accel_ch->ch, batch, _accel_engine_done,
-					      accel_task->offload_ctx);
-}
-
-/* Accel framework public API for getting max batch. All engines are
- * required to implement this API.
- */
-uint32_t
-spdk_accel_batch_get_max(struct spdk_io_channel *ch)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-
-	return accel_ch->engine->batch_get_max();
-}
-
-/* Accel framework public API for for when an app is unable to complete a batch sequence,
- * it cancels with this API.
- */
-int
-spdk_accel_batch_cancel(struct spdk_io_channel *ch, struct spdk_accel_batch *batch)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-
-	return accel_ch->engine->batch_cancel(accel_ch->ch, batch);
-}
-
-/* Accel framework public API for batch prep_copy function. All engines are
- * required to implement this API.
- */
-int
-spdk_accel_batch_prep_copy(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *dst,
-			   void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
-	if (accel_task == NULL) {
-		return -ENOMEM;
-	}
-
-	return accel_ch->engine->batch_prep_copy(accel_ch->ch, batch, dst, src, nbytes,
-			_accel_engine_done, accel_task->offload_ctx);
-}
-
-/* Accel framework public API for batch prep_dualcast function.  All engines are
- * required to implement this API.
- */
-int
-spdk_accel_batch_prep_dualcast(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			       void *dst1, void *dst2, void *src, uint64_t nbytes,
-			       spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-
-	if ((uintptr_t)dst1 & (ALIGN_4K - 1) || (uintptr_t)dst2 & (ALIGN_4K - 1)) {
-		SPDK_ERRLOG("Dualcast requires 4K alignment on dst addresses\n");
-		return -EINVAL;
-	}
-
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
-	if (accel_task == NULL) {
-		return -ENOMEM;
-	}
-
-	return accel_ch->engine->batch_prep_dualcast(accel_ch->ch, batch, dst1, dst2, src,
-			nbytes, _accel_engine_done, accel_task->offload_ctx);
-}
-
-/* Accel framework public API for batch prep_compare function.  All engines are
- * required to implement this API.
- */
-int
-spdk_accel_batch_prep_compare(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			      void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn,
-			      void *cb_arg)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
-	if (accel_task == NULL) {
-		return -ENOMEM;
-	}
-
-	return accel_ch->engine->batch_prep_compare(accel_ch->ch, batch, src1, src2, nbytes,
-			_accel_engine_done, accel_task->offload_ctx);
-}
-
-/* Accel framework public API for batch prep_fill function.  All engines are
- * required to implement this API.
- */
-int
-spdk_accel_batch_prep_fill(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *dst,
-			   uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
-	if (accel_task == NULL) {
-		return -ENOMEM;
-	}
-
-	return accel_ch->engine->batch_prep_fill(accel_ch->ch, batch, dst, fill, nbytes,
-			_accel_engine_done, accel_task->offload_ctx);
-}
-
-/* Accel framework public API for batch prep_crc32c function.  All engines are
- * required to implement this API.
- */
-int
-spdk_accel_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			     uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
-			     spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
-	if (accel_task == NULL) {
-		return -ENOMEM;
-	}
-
-	return accel_ch->engine->batch_prep_crc32c(accel_ch->ch, batch, dst, src, seed, nbytes,
-			_accel_engine_done, accel_task->offload_ctx);
-}
-
 /* Accel framework public API for compare function */
 int
 spdk_accel_submit_compare(struct spdk_io_channel *ch, void *src1, void *src2, uint64_t nbytes,
@@ -360,19 +245,24 @@ spdk_accel_submit_compare(struct spdk_io_channel *ch, void *src1, void *src2, ui
 {
 	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
 	struct spdk_accel_task *accel_task;
+	int rc;
 
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
+	accel_task = _get_task(accel_ch, NULL, cb_fn, cb_arg);
 	if (accel_task == NULL) {
 		return -ENOMEM;
 	}
 
-	/* If the engine does not support it, fallback to the sw implementation. */
-	if (accel_ch->engine->compare) {
-		return accel_ch->engine->compare(accel_ch->ch, src1, src2, nbytes,
-						 _accel_engine_done, accel_task->offload_ctx);
+	accel_task->src = src1;
+	accel_task->src2 = src2;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_COMPARE;
+
+	if (_is_supported(accel_ch->engine, ACCEL_COMPARE)) {
+		return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
 	} else {
-		return sw_accel_submit_compare(accel_ch->ch, src1, src2, nbytes,
-					       _accel_engine_done, accel_task->offload_ctx);
+		rc = _sw_accel_compare(src1, src2, nbytes);
+		spdk_accel_task_complete(accel_task, rc);
+		return 0;
 	}
 }
 
@@ -384,18 +274,22 @@ spdk_accel_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill, uint
 	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
 	struct spdk_accel_task *accel_task;
 
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
+	accel_task = _get_task(accel_ch, NULL, cb_fn, cb_arg);
 	if (accel_task == NULL) {
 		return -ENOMEM;
 	}
 
-	/* If the engine does not support it, fallback to the sw implementation. */
-	if (accel_ch->engine->fill) {
-		return accel_ch->engine->fill(accel_ch->ch, dst, fill, nbytes,
-					      _accel_engine_done, accel_task->offload_ctx);
+	accel_task->dst = dst;
+	accel_task->fill_pattern = fill;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_MEMFILL;
+
+	if (_is_supported(accel_ch->engine, ACCEL_FILL)) {
+		return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
 	} else {
-		return sw_accel_submit_fill(accel_ch->ch, dst, fill, nbytes,
-					    _accel_engine_done, accel_task->offload_ctx);
+		_sw_accel_fill(dst, fill, nbytes);
+		spdk_accel_task_complete(accel_task, 0);
+		return 0;
 	}
 }
 
@@ -407,21 +301,304 @@ spdk_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src, u
 	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
 	struct spdk_accel_task *accel_task;
 
-	accel_task = _get_task(accel_ch, cb_fn, cb_arg);
+	accel_task = _get_task(accel_ch, NULL, cb_fn, cb_arg);
 	if (accel_task == NULL) {
 		return -ENOMEM;
 	}
 
-	/* If the engine does not support it, fallback to the sw implementation. */
-	if (accel_ch->engine->crc32c) {
-		return accel_ch->engine->crc32c(accel_ch->ch, dst, src,	seed, nbytes,
-						_accel_engine_done, accel_task->offload_ctx);
+	accel_task->dst = (void *)dst;
+	accel_task->src = src;
+	accel_task->seed = seed;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_CRC32C;
+
+	if (_is_supported(accel_ch->engine, ACCEL_CRC32C)) {
+		return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
 	} else {
-		return sw_accel_submit_crc32c(accel_ch->ch, dst, src, seed, nbytes,
-					      _accel_engine_done, accel_task->offload_ctx);
+		_sw_accel_crc32c(dst, src, seed, nbytes);
+		spdk_accel_task_complete(accel_task, 0);
+		return 0;
 	}
 }
 
+/* Accel framework public API for getting max operations for a batch. */
+uint32_t
+spdk_accel_batch_get_max(struct spdk_io_channel *ch)
+{
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+
+	/* Use the smaller of the currently selected engine or pure SW implementation. */
+	return spdk_min(accel_ch->engine->batch_get_max(accel_ch->engine_ch),
+			MAX_BATCH_SIZE);
+}
+
+int
+spdk_accel_batch_prep_copy(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *dst,
+			   void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
+{
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+	struct spdk_accel_task *accel_task;
+
+	accel_task = _get_task(accel_ch, batch, cb_fn, cb_arg);
+	if (accel_task == NULL) {
+		return -ENOMEM;
+	}
+
+	accel_task->src = src;
+	accel_task->dst = dst;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_MEMMOVE;
+
+	if (_is_supported(accel_ch->engine, ACCEL_COPY)) {
+		TAILQ_INSERT_TAIL(&batch->hw_tasks, accel_task, link);
+	} else {
+		TAILQ_INSERT_TAIL(&batch->sw_tasks, accel_task, link);
+	}
+
+	return 0;
+}
+
+int
+spdk_accel_batch_prep_dualcast(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+			       void *dst1, void *dst2, void *src, uint64_t nbytes,
+			       spdk_accel_completion_cb cb_fn, void *cb_arg)
+{
+	struct spdk_accel_task *accel_task;
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+
+	if ((uintptr_t)dst1 & (ALIGN_4K - 1) || (uintptr_t)dst2 & (ALIGN_4K - 1)) {
+		SPDK_ERRLOG("Dualcast requires 4K alignment on dst addresses\n");
+		return -EINVAL;
+	}
+
+	accel_task = _get_task(accel_ch, batch, cb_fn, cb_arg);
+	if (accel_task == NULL) {
+		return -ENOMEM;
+	}
+
+	accel_task->src = src;
+	accel_task->dst = dst1;
+	accel_task->dst2 = dst2;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_DUALCAST;
+
+	if (_is_supported(accel_ch->engine, ACCEL_DUALCAST)) {
+		TAILQ_INSERT_TAIL(&batch->hw_tasks, accel_task, link);
+	} else {
+		TAILQ_INSERT_TAIL(&batch->sw_tasks, accel_task, link);
+	}
+
+	return 0;
+}
+
+int
+spdk_accel_batch_prep_compare(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+			      void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn,
+			      void *cb_arg)
+{
+	struct spdk_accel_task *accel_task;
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+
+	accel_task = _get_task(accel_ch, batch, cb_fn, cb_arg);
+	if (accel_task == NULL) {
+		return -ENOMEM;
+	}
+
+	accel_task->src = src1;
+	accel_task->src2 = src2;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_COMPARE;
+
+	if (_is_supported(accel_ch->engine, ACCEL_COMPARE)) {
+		TAILQ_INSERT_TAIL(&batch->hw_tasks, accel_task, link);
+	} else {
+		TAILQ_INSERT_TAIL(&batch->sw_tasks, accel_task, link);
+	}
+
+	return 0;
+}
+
+int
+spdk_accel_batch_prep_fill(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *dst,
+			   uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
+{
+	struct spdk_accel_task *accel_task;
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+
+	accel_task = _get_task(accel_ch, batch, cb_fn, cb_arg);
+	if (accel_task == NULL) {
+		return -ENOMEM;
+	}
+
+	accel_task->dst = dst;
+	accel_task->fill_pattern = fill;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_MEMFILL;
+
+	if (_is_supported(accel_ch->engine, ACCEL_FILL)) {
+		TAILQ_INSERT_TAIL(&batch->hw_tasks, accel_task, link);
+	} else {
+		TAILQ_INSERT_TAIL(&batch->sw_tasks, accel_task, link);
+	}
+
+	return 0;
+}
+
+int
+spdk_accel_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+			     uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
+			     spdk_accel_completion_cb cb_fn, void *cb_arg)
+{
+	struct spdk_accel_task *accel_task;
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+
+	accel_task = _get_task(accel_ch, batch, cb_fn, cb_arg);
+	if (accel_task == NULL) {
+		return -ENOMEM;
+	}
+
+	accel_task->dst = dst;
+	accel_task->src = src;
+	accel_task->seed = seed;
+	accel_task->nbytes = nbytes;
+	accel_task->op_code = ACCEL_OPCODE_CRC32C;
+
+	if (_is_supported(accel_ch->engine, ACCEL_CRC32C)) {
+		TAILQ_INSERT_TAIL(&batch->hw_tasks, accel_task, link);
+	} else {
+		TAILQ_INSERT_TAIL(&batch->sw_tasks, accel_task, link);
+	}
+
+	return 0;
+}
+
+/* Accel framework public API for batch_create function. */
+struct spdk_accel_batch *
+spdk_accel_batch_create(struct spdk_io_channel *ch)
+{
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+	struct spdk_accel_batch *batch;
+
+	batch = TAILQ_FIRST(&accel_ch->batch_pool);
+	if (batch == NULL) {
+		/* The application needs to handle this case (no batches available) */
+		return NULL;
+	}
+
+	TAILQ_REMOVE(&accel_ch->batch_pool, batch, link);
+	TAILQ_INIT(&batch->hw_tasks);
+	TAILQ_INIT(&batch->sw_tasks);
+	batch->count = batch->status = 0;
+	batch->accel_ch = accel_ch;
+	TAILQ_INSERT_TAIL(&accel_ch->batches, batch, link);
+	SPDK_DEBUGLOG(accel, "Create batch %p\n", batch);
+
+	return (struct spdk_accel_batch *)batch;
+}
+
+/* Accel framework public API for batch_submit function. */
+int
+spdk_accel_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
+			spdk_accel_completion_cb cb_fn, void *cb_arg)
+{
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+	struct spdk_accel_task *accel_task, *next_task;
+	int rc = 0;
+
+	if (_is_batch_valid(batch, accel_ch) == false) {
+		SPDK_ERRLOG("Attempt to access an invalid batch.\n.");
+		return -EINVAL;
+	}
+
+	batch->cb_fn = cb_fn;
+	batch->cb_arg = cb_arg;
+
+	/* Process any HW commands. */
+	if (!TAILQ_EMPTY(&batch->hw_tasks)) {
+		accel_task = TAILQ_FIRST(&batch->hw_tasks);
+
+		/* Clear the hw_tasks list but leave the tasks linked. */
+		TAILQ_INIT(&batch->hw_tasks);
+
+		/* The submit_tasks function will always return success and use the
+		 * task callbacks to report errors.
+		 */
+		accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
+	}
+
+	/* Process any SW commands. */
+	accel_task = TAILQ_FIRST(&batch->sw_tasks);
+
+	/* Clear the hw_tasks list but leave the tasks linked. */
+	TAILQ_INIT(&batch->sw_tasks);
+
+	while (accel_task) {
+		/* Grab the next task now before it's returned to the pool in the cb_fn. */
+		next_task = TAILQ_NEXT(accel_task, link);
+
+		switch (accel_task->op_code) {
+		case ACCEL_OPCODE_MEMMOVE:
+			_sw_accel_copy(accel_task->dst, accel_task->src, accel_task->nbytes);
+			spdk_accel_task_complete(accel_task, 0);
+			break;
+		case ACCEL_OPCODE_MEMFILL:
+			_sw_accel_fill(accel_task->dst, accel_task->fill_pattern, accel_task->nbytes);
+			spdk_accel_task_complete(accel_task, 0);
+			break;
+		case ACCEL_OPCODE_COMPARE:
+			rc = _sw_accel_compare(accel_task->src, accel_task->src2, accel_task->nbytes);
+			spdk_accel_task_complete(accel_task, rc);
+			batch->status |= rc;
+			break;
+		case ACCEL_OPCODE_CRC32C:
+			_sw_accel_crc32c(accel_task->dst, accel_task->src, accel_task->seed,
+					 accel_task->nbytes);
+			spdk_accel_task_complete(accel_task, 0);
+			break;
+		case ACCEL_OPCODE_DUALCAST:
+			_sw_accel_dualcast(accel_task->dst, accel_task->dst2, accel_task->src,
+					   accel_task->nbytes);
+			spdk_accel_task_complete(accel_task, 0);
+			break;
+		default:
+			assert(false);
+			break;
+		}
+		accel_task = next_task;
+	};
+
+	/* There are no submission errors possible at this point. Any possible errors will
+	 * happen in the task cb_fn calls and OR'd into the batch->status.
+	 */
+	return 0;
+}
+
+/* Accel framework public API for batch cancel function. If the engine does
+ * not support batching it is done here at the accel_fw level.
+ */
+int
+spdk_accel_batch_cancel(struct spdk_io_channel *ch, struct spdk_accel_batch *batch)
+{
+	struct accel_io_channel *accel_ch = spdk_io_channel_get_ctx(ch);
+	struct spdk_accel_task *accel_task;
+
+	/* Cancel anything currently oustanding for this batch. */
+	while ((batch = TAILQ_FIRST(&accel_ch->batches))) {
+		TAILQ_REMOVE(&accel_ch->batches, batch, link);
+		while ((accel_task = TAILQ_FIRST(&batch->hw_tasks))) {
+			TAILQ_REMOVE(&batch->hw_tasks, accel_task, link);
+			TAILQ_INSERT_TAIL(&accel_ch->task_pool, accel_task, link);
+		}
+		while ((accel_task = TAILQ_FIRST(&batch->sw_tasks))) {
+			TAILQ_REMOVE(&batch->sw_tasks, accel_task, link);
+			TAILQ_INSERT_TAIL(&accel_ch->task_pool, accel_task, link);
+		}
+		TAILQ_INSERT_TAIL(&accel_ch->batch_pool, batch, link);
+	}
+
+	return 0;
+}
+
 /* Helper function when when accel modules register with the framework. */
 void spdk_accel_module_list_add(struct spdk_accel_module_if *accel_module)
 {
@@ -438,6 +615,7 @@ accel_engine_create_cb(void *io_device, void *ctx_buf)
 	struct accel_io_channel	*accel_ch = ctx_buf;
 	struct spdk_accel_task *accel_task;
 	uint8_t *task_mem;
+	struct spdk_accel_batch *batch;
 	int i;
 
 	accel_ch->task_pool_base = calloc(MAX_TASKS_PER_CHANNEL, g_max_accel_module_size);
@@ -453,18 +631,31 @@ accel_engine_create_cb(void *io_device, void *ctx_buf)
 		task_mem += g_max_accel_module_size;
 	}
 
-	if (g_hw_accel_engine != NULL) {
-		accel_ch->ch = g_hw_accel_engine->get_io_channel();
-		if (accel_ch->ch != NULL) {
-			accel_ch->engine = g_hw_accel_engine;
-			return 0;
-		}
+	TAILQ_INIT(&accel_ch->batch_pool);
+	TAILQ_INIT(&accel_ch->batches);
+	accel_ch->batch_pool_base = calloc(MAX_NUM_BATCHES_PER_CHANNEL, sizeof(struct spdk_accel_batch));
+	if (accel_ch->batch_pool_base == NULL) {
+		free(accel_ch->task_pool_base);
+		return -ENOMEM;
 	}
 
-	/* No hw engine enabled, use sw. */
-	accel_ch->ch = g_sw_accel_engine->get_io_channel();
-	assert(accel_ch->ch != NULL);
-	accel_ch->engine = g_sw_accel_engine;
+	batch = (struct spdk_accel_batch *)accel_ch->batch_pool_base;
+	for (i = 0 ; i < MAX_NUM_BATCHES_PER_CHANNEL; i++) {
+		TAILQ_INSERT_TAIL(&accel_ch->batch_pool, batch, link);
+		batch++;
+	}
+
+	if (g_hw_accel_engine != NULL) {
+		accel_ch->engine_ch = g_hw_accel_engine->get_io_channel();
+		accel_ch->engine = g_hw_accel_engine;
+	} else {
+		/* No hw engine enabled, use sw. */
+		accel_ch->engine_ch = g_sw_accel_engine->get_io_channel();
+		accel_ch->engine = g_sw_accel_engine;
+	}
+	assert(accel_ch->engine_ch != NULL);
+	accel_ch->engine->capabilities = accel_ch->engine->get_capabilities();
+
 	return 0;
 }
 
@@ -474,7 +665,8 @@ accel_engine_destroy_cb(void *io_device, void *ctx_buf)
 {
 	struct accel_io_channel	*accel_ch = ctx_buf;
 
-	spdk_put_io_channel(accel_ch->ch);
+	free(accel_ch->batch_pool_base);
+	spdk_put_io_channel(accel_ch->engine_ch);
 	free(accel_ch->task_pool_base);
 }
 
@@ -526,8 +718,8 @@ spdk_accel_write_config_json(struct spdk_json_write_ctx *w)
 	struct spdk_accel_module_if *accel_engine_module;
 
 	/*
-	 * The accel engine has no config, there may be some in
-	 * the modules though.
+	 * The accel fw has no config, there may be some in
+	 * the engines/modules though.
 	 */
 	spdk_json_write_array_begin(w);
 	TAILQ_FOREACH(accel_engine_module, &spdk_accel_module_list, tailq) {
@@ -574,418 +766,67 @@ spdk_accel_engine_finish(spdk_accel_fini_cb cb_fn, void *cb_arg)
 /*
  * The SW Accelerator module is "built in" here (rest of file)
  */
-
-#define SW_ACCEL_BATCH_SIZE 2048
-
-enum sw_accel_opcode {
-	SW_ACCEL_OPCODE_MEMMOVE		= 0,
-	SW_ACCEL_OPCODE_MEMFILL		= 1,
-	SW_ACCEL_OPCODE_COMPARE		= 2,
-	SW_ACCEL_OPCODE_CRC32C		= 3,
-	SW_ACCEL_OPCODE_DUALCAST	= 4,
-};
-
-struct sw_accel_op {
-	struct sw_accel_io_channel	*sw_ch;
-	void				*cb_arg;
-	spdk_accel_completion_cb	cb_fn;
-	void				*src;
-	union {
-		void			*dst;
-		void			*src2;
-	};
-	void				*dst2;
-	uint32_t			seed;
-	uint64_t			fill_pattern;
-	enum sw_accel_opcode		op_code;
-	uint64_t			nbytes;
-	TAILQ_ENTRY(sw_accel_op)	link;
-};
-
-/* The sw accel engine only supports one outstanding batch at a time. */
-struct sw_accel_io_channel {
-	TAILQ_HEAD(, sw_accel_op)	op_pool;
-	TAILQ_HEAD(, sw_accel_op)	batch;
-};
-
 static uint64_t
 sw_accel_get_capabilities(void)
 {
-	return ACCEL_COPY | ACCEL_FILL | ACCEL_CRC32C | ACCEL_COMPARE |
-	       ACCEL_DUALCAST | ACCEL_BATCH;
-}
-
-static uint32_t
-sw_accel_batch_get_max(void)
-{
-	return SW_ACCEL_BATCH_SIZE;
-}
-
-/* The sw engine plug-in does not ahve a public API, it is only callable
- * from the accel fw and thus does not need to have its own struct definition
- * of a batch, it just simply casts the address of the single supported batch
- * as the struct spdk_accel_batch pointer.
- */
-static struct spdk_accel_batch *
-sw_accel_batch_start(struct spdk_io_channel *ch)
-{
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-
-	if (!TAILQ_EMPTY(&sw_ch->batch)) {
-		SPDK_ERRLOG("SW accel engine only supports one batch at a time.\n");
-		return NULL;
-	}
-
-	return (struct spdk_accel_batch *)&sw_ch->batch;
-}
-
-static struct sw_accel_op *
-_prep_op(struct sw_accel_io_channel *sw_ch, struct spdk_accel_batch *batch,
-	 spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct sw_accel_op *op;
-
-	if ((struct spdk_accel_batch *)&sw_ch->batch != batch) {
-		SPDK_ERRLOG("Invalid batch\n");
-		return NULL;
-	}
-
-	if (!TAILQ_EMPTY(&sw_ch->op_pool)) {
-		op = TAILQ_FIRST(&sw_ch->op_pool);
-		TAILQ_REMOVE(&sw_ch->op_pool, op, link);
-	} else {
-		SPDK_ERRLOG("Ran out of operations for batch\n");
-		return NULL;
-	}
-
-	op->cb_arg = cb_arg;
-	op->cb_fn = cb_fn;
-	op->sw_ch = sw_ch;
-
-	return op;
-}
-
-static int
-sw_accel_batch_prep_copy(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			 void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct sw_accel_op *op;
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(sw_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->src = src;
-	op->dst = dst;
-	op->nbytes = nbytes;
-	op->op_code = SW_ACCEL_OPCODE_MEMMOVE;
-	TAILQ_INSERT_TAIL(&sw_ch->batch, op, link);
-
+	/* No HW acceleration capabilities. */
 	return 0;
 }
 
-static int
-sw_accel_batch_prep_dualcast(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *dst1,
-			     void *dst2,
-			     void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
+static void
+_sw_accel_dualcast(void *dst1, void *dst2, void *src, uint64_t nbytes)
 {
-	struct sw_accel_op *op;
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(sw_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->src = src;
-	op->dst = dst1;
-	op->dst2 = dst2;
-	op->nbytes = nbytes;
-	op->op_code = SW_ACCEL_OPCODE_DUALCAST;
-	TAILQ_INSERT_TAIL(&sw_ch->batch, op, link);
-
-	return 0;
-}
-
-static int
-sw_accel_batch_prep_compare(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *src1,
-			    void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct sw_accel_op *op;
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(sw_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->src = src1;
-	op->src2 = src2;
-	op->nbytes = nbytes;
-	op->op_code = SW_ACCEL_OPCODE_COMPARE;
-	TAILQ_INSERT_TAIL(&sw_ch->batch, op, link);
-
-	return 0;
-}
-
-static int
-sw_accel_batch_prep_fill(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *dst,
-			 uint8_t fill,
-			 uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct sw_accel_op *op;
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(sw_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->dst = dst;
-	op->fill_pattern = fill;
-	op->nbytes = nbytes;
-	op->op_code = SW_ACCEL_OPCODE_MEMFILL;
-	TAILQ_INSERT_TAIL(&sw_ch->batch, op, link);
-
-	return 0;
-}
-
-static int
-sw_accel_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			   uint32_t *dst,
-			   void *src, uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct sw_accel_op *op;
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(sw_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->dst = (void *)dst;
-	op->src = src;
-	op->seed = seed;
-	op->nbytes = nbytes;
-	op->op_code = SW_ACCEL_OPCODE_CRC32C;
-	TAILQ_INSERT_TAIL(&sw_ch->batch, op, link);
-
-	return 0;
-}
-
-
-static int
-sw_accel_batch_cancel(struct spdk_io_channel *ch, struct spdk_accel_batch *batch)
-{
-	struct sw_accel_op *op;
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-
-	if ((struct spdk_accel_batch *)&sw_ch->batch != batch) {
-		SPDK_ERRLOG("Invalid batch\n");
-		return -EINVAL;
-	}
-
-	/* Cancel the batch items by moving them back to the op_pool. */
-	while ((op = TAILQ_FIRST(&sw_ch->batch))) {
-		TAILQ_REMOVE(&sw_ch->batch, op, link);
-		TAILQ_INSERT_TAIL(&sw_ch->op_pool, op, link);
-	}
-
-	return 0;
-}
-
-static int
-sw_accel_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-		      spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct sw_accel_op *op;
-	struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-	int batch_status = 0, cmd_status = 0;
-
-	if ((struct spdk_accel_batch *)&sw_ch->batch != batch) {
-		SPDK_ERRLOG("Invalid batch\n");
-		return -EINVAL;
-	}
-
-	/* Complete the batch items. */
-	while ((op = TAILQ_FIRST(&sw_ch->batch))) {
-		TAILQ_REMOVE(&sw_ch->batch, op, link);
-		accel_task = (struct spdk_accel_task *)((uintptr_t)op->cb_arg -
-							offsetof(struct spdk_accel_task, offload_ctx));
-
-		switch (op->op_code) {
-		case SW_ACCEL_OPCODE_MEMMOVE:
-			memcpy(op->dst, op->src, op->nbytes);
-			break;
-		case SW_ACCEL_OPCODE_DUALCAST:
-			memcpy(op->dst, op->src, op->nbytes);
-			memcpy(op->dst2, op->src, op->nbytes);
-			break;
-		case SW_ACCEL_OPCODE_COMPARE:
-			cmd_status = memcmp(op->src, op->src2, op->nbytes);
-			break;
-		case SW_ACCEL_OPCODE_MEMFILL:
-			memset(op->dst, op->fill_pattern, op->nbytes);
-			break;
-		case SW_ACCEL_OPCODE_CRC32C:
-			*(uint32_t *)op->dst = spdk_crc32c_update(op->src, op->nbytes, ~op->seed);
-			break;
-		default:
-			assert(false);
-			break;
-		}
-
-		batch_status |= cmd_status;
-		op->cb_fn(accel_task, cmd_status);
-		TAILQ_INSERT_TAIL(&sw_ch->op_pool, op, link);
-	}
-
-	/* Now complete the batch request itself. */
-	accel_task = (struct spdk_accel_task *)((uintptr_t)cb_arg -
-						offsetof(struct spdk_accel_task, offload_ctx));
-	cb_fn(accel_task, batch_status);
-
-	return 0;
-}
-
-static int
-sw_accel_submit_copy(struct spdk_io_channel *ch, void *dst, void *src,
-		     uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct spdk_accel_task *accel_task;
-
-	memcpy(dst, src, (size_t)nbytes);
-
-	accel_task = (struct spdk_accel_task *)((uintptr_t)cb_arg -
-						offsetof(struct spdk_accel_task, offload_ctx));
-	cb_fn(accel_task, 0);
-	return 0;
-}
-
-static int
-sw_accel_submit_dualcast(struct spdk_io_channel *ch, void *dst1, void *dst2,
-			 void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct spdk_accel_task *accel_task;
-
 	memcpy(dst1, src, (size_t)nbytes);
 	memcpy(dst2, src, (size_t)nbytes);
+}
 
-	accel_task = (struct spdk_accel_task *)((uintptr_t)cb_arg -
-						offsetof(struct spdk_accel_task, offload_ctx));
-	cb_fn(accel_task, 0);
-	return 0;
+static void
+_sw_accel_copy(void *dst, void *src, uint64_t nbytes)
+{
+	memcpy(dst, src, (size_t)nbytes);
 }
 
 static int
-sw_accel_submit_compare(struct spdk_io_channel *ch, void *src1, void *src2,
-			uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
+_sw_accel_compare(void *src1, void *src2, uint64_t nbytes)
 {
-	struct spdk_accel_task *accel_task;
-	int result;
-
-	result = memcmp(src1, src2, (size_t)nbytes);
-
-	accel_task = (struct spdk_accel_task *)((uintptr_t)cb_arg -
-						offsetof(struct spdk_accel_task, offload_ctx));
-	cb_fn(accel_task, result);
-
-	return 0;
+	return memcmp(src1, src2, (size_t)nbytes);
 }
 
-static int
-sw_accel_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill,
-		     uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
+static void
+_sw_accel_fill(void *dst, uint8_t fill, uint64_t nbytes)
 {
-	struct spdk_accel_task *accel_task;
-
 	memset(dst, fill, nbytes);
-	accel_task = (struct spdk_accel_task *)((uintptr_t)cb_arg -
-						offsetof(struct spdk_accel_task, offload_ctx));
-	cb_fn(accel_task, 0);
-
-	return 0;
 }
 
-static int
-sw_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src,
-		       uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
+static void
+_sw_accel_crc32c(uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes)
 {
-	struct spdk_accel_task *accel_task;
-
 	*dst = spdk_crc32c_update(src, nbytes, ~seed);
-	accel_task = (struct spdk_accel_task *)((uintptr_t)cb_arg -
-						offsetof(struct spdk_accel_task, offload_ctx));
-	cb_fn(accel_task, 0);
-
-	return 0;
 }
 
 static struct spdk_io_channel *sw_accel_get_io_channel(void);
 
+static uint32_t
+sw_accel_batch_get_max(struct spdk_io_channel *ch)
+{
+	return MAX_BATCH_SIZE;
+}
+
 static struct spdk_accel_engine sw_accel_engine = {
 	.get_capabilities	= sw_accel_get_capabilities,
-	.copy			= sw_accel_submit_copy,
-	.dualcast		= sw_accel_submit_dualcast,
-	.batch_get_max		= sw_accel_batch_get_max,
-	.batch_create		= sw_accel_batch_start,
-	.batch_cancel		= sw_accel_batch_cancel,
-	.batch_prep_copy	= sw_accel_batch_prep_copy,
-	.batch_prep_dualcast	= sw_accel_batch_prep_dualcast,
-	.batch_prep_compare	= sw_accel_batch_prep_compare,
-	.batch_prep_fill	= sw_accel_batch_prep_fill,
-	.batch_prep_crc32c	= sw_accel_batch_prep_crc32c,
-	.batch_submit		= sw_accel_batch_submit,
-	.compare		= sw_accel_submit_compare,
-	.fill			= sw_accel_submit_fill,
-	.crc32c			= sw_accel_submit_crc32c,
 	.get_io_channel		= sw_accel_get_io_channel,
+	.batch_get_max		= sw_accel_batch_get_max,
 };
 
 static int
 sw_accel_create_cb(void *io_device, void *ctx_buf)
 {
-	struct sw_accel_io_channel *sw_ch = ctx_buf;
-	struct sw_accel_op *op;
-	int i;
-
-	TAILQ_INIT(&sw_ch->batch);
-
-	TAILQ_INIT(&sw_ch->op_pool);
-	for (i = 0 ; i < SW_ACCEL_BATCH_SIZE ; i++) {
-		op = calloc(1, sizeof(struct sw_accel_op));
-		if (op == NULL) {
-			SPDK_ERRLOG("Failed to allocate operation for batch.\n");
-			while ((op = TAILQ_FIRST(&sw_ch->op_pool))) {
-				TAILQ_REMOVE(&sw_ch->op_pool, op, link);
-				free(op);
-			}
-			return -ENOMEM;
-		}
-		TAILQ_INSERT_TAIL(&sw_ch->op_pool, op, link);
-	}
-
 	return 0;
 }
 
 static void
 sw_accel_destroy_cb(void *io_device, void *ctx_buf)
 {
-	struct sw_accel_io_channel *sw_ch = ctx_buf;
-	struct sw_accel_op *op;
-
-	while ((op = TAILQ_FIRST(&sw_ch->op_pool))) {
-		TAILQ_REMOVE(&sw_ch->op_pool, op, link);
-		free(op);
-	}
 }
 
 static struct spdk_io_channel *sw_accel_get_io_channel(void)
@@ -1004,7 +845,7 @@ sw_accel_engine_init(void)
 {
 	accel_sw_register(&sw_accel_engine);
 	spdk_io_device_register(&sw_accel_engine, sw_accel_create_cb, sw_accel_destroy_cb,
-				sizeof(struct sw_accel_io_channel), "sw_accel_engine");
+				0, "sw_accel_engine");
 
 	return 0;
 }
@@ -1018,5 +859,7 @@ sw_accel_engine_fini(void *ctxt)
 	spdk_accel_engine_module_finish();
 }
 
+SPDK_LOG_REGISTER_COMPONENT(accel)
+
 SPDK_ACCEL_MODULE_REGISTER(sw_accel_engine_init, sw_accel_engine_fini,
 			   NULL, sw_accel_engine_get_ctx_size)
diff --git a/lib/accel/spdk_accel.map b/lib/accel/spdk_accel.map
index 20f86c56f..88ebd38b1 100644
--- a/lib/accel/spdk_accel.map
+++ b/lib/accel/spdk_accel.map
@@ -26,7 +26,7 @@
 	# functions needed by modules
 	spdk_accel_hw_engine_register;
 	spdk_accel_module_list_add;
-
+	spdk_accel_task_complete;
 
 	local: *;
 };
diff --git a/mk/spdk.lib_deps.mk b/mk/spdk.lib_deps.mk
index 5ccc3b8a6..f16eec5d7 100644
--- a/mk/spdk.lib_deps.mk
+++ b/mk/spdk.lib_deps.mk
@@ -107,7 +107,7 @@ DEPDIRS-blobfs_bdev += event
 endif
 
 # module/accel
-DEPDIRS-accel_ioat := log ioat thread $(JSON_LIBS) accel util
+DEPDIRS-accel_ioat := log ioat thread $(JSON_LIBS) accel
 DEPDIRS-accel_idxd := log idxd thread $(JSON_LIBS) accel
 
 # module/env_dpdk
diff --git a/module/accel/idxd/accel_engine_idxd.c b/module/accel/idxd/accel_engine_idxd.c
index 14c839806..f6a31d78d 100644
--- a/module/accel/idxd/accel_engine_idxd.c
+++ b/module/accel/idxd/accel_engine_idxd.c
@@ -46,10 +46,9 @@
 #include "spdk/util.h"
 #include "spdk/json.h"
 
-#define ALIGN_4K 0x1000
-
 static bool g_idxd_enable = false;
 uint32_t g_config_number;
+static uint32_t g_batch_max;
 
 enum channel_state {
 	IDXD_CHANNEL_ACTIVE,
@@ -73,35 +72,13 @@ struct idxd_device {
 static TAILQ_HEAD(, idxd_device) g_idxd_devices = TAILQ_HEAD_INITIALIZER(g_idxd_devices);
 static struct idxd_device *g_next_dev = NULL;
 
-struct idxd_op {
-	struct spdk_idxd_io_channel	*chan;
-	void				*cb_arg;
-	spdk_idxd_req_cb		cb_fn;
-	void				*src;
-	union {
-		void			*dst;
-		void			*src2;
-	};
-	void				*dst2;
-	uint32_t			seed;
-	uint64_t			fill_pattern;
-	uint32_t			op_code;
-	uint64_t			nbytes;
-	struct idxd_batch		*batch;
-	TAILQ_ENTRY(idxd_op)		link;
-};
-
 struct idxd_io_channel {
 	struct spdk_idxd_io_channel	*chan;
 	struct spdk_idxd_device		*idxd;
 	struct idxd_device		*dev;
 	enum channel_state		state;
 	struct spdk_poller		*poller;
-	TAILQ_HEAD(, idxd_op)		queued_ops;
-};
-
-struct idxd_task {
-	spdk_accel_completion_cb	cb;
+	TAILQ_HEAD(, spdk_accel_task)	queued_tasks;
 };
 
 pthread_mutex_t g_configuration_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -123,12 +100,174 @@ idxd_select_device(void)
 	return g_next_dev;
 }
 
+static void
+idxd_done(void *cb_arg, int status)
+{
+	struct spdk_accel_task *accel_task = cb_arg;
+
+	spdk_accel_task_complete(accel_task, status);
+}
+
+static int
+_process_single_task(struct spdk_io_channel *ch, struct spdk_accel_task *task)
+{
+	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
+	int rc = 0;
+
+	switch (task->op_code) {
+	case ACCEL_OPCODE_MEMMOVE:
+		rc = spdk_idxd_submit_copy(chan->chan, task->dst, task->src, task->nbytes, idxd_done, task);
+		break;
+	case ACCEL_OPCODE_DUALCAST:
+		rc = spdk_idxd_submit_dualcast(chan->chan, task->dst, task->dst2, task->src, task->nbytes,
+					       idxd_done, task);
+		break;
+	case ACCEL_OPCODE_COMPARE:
+		rc = spdk_idxd_submit_compare(chan->chan, task->src, task->src2, task->nbytes, idxd_done, task);
+		break;
+	case ACCEL_OPCODE_MEMFILL:
+		rc = spdk_idxd_submit_fill(chan->chan, task->dst, task->fill_pattern, task->nbytes, idxd_done,
+					   task);
+		break;
+	case ACCEL_OPCODE_CRC32C:
+		rc = spdk_idxd_submit_crc32c(chan->chan, task->dst, task->src, task->seed, task->nbytes, idxd_done,
+					     task);
+		break;
+	default:
+		assert(false);
+		rc = -EINVAL;
+		break;
+	}
+
+	return rc;
+}
+
+static int
+idxd_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *first_task)
+{
+	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
+	struct spdk_accel_task *task, *tmp, *batch_task;
+	struct idxd_batch *idxd_batch;
+	TAILQ_HEAD(, spdk_accel_task) batch_tasks;
+	int rc = 0;
+	uint32_t task_count = 0;
+
+	task = first_task;
+
+	if (chan->state == IDXD_CHANNEL_PAUSED) {
+		goto queue_tasks;
+	} else if (chan->state == IDXD_CHANNEL_ERROR) {
+		while (task) {
+			tmp = TAILQ_NEXT(task, link);
+			spdk_accel_task_complete(task, -EINVAL);
+			task = tmp;
+		}
+		return 0;
+	}
+
+	/* If this is just a single task handle it here. */
+	if (!TAILQ_NEXT(task, link)) {
+		rc = _process_single_task(ch, task);
+
+		if (rc == -EBUSY) {
+			goto queue_tasks;
+		} else if (rc) {
+			spdk_accel_task_complete(task, rc);
+		}
+
+		return 0;
+	}
+
+	/* More than one task, create IDXD batch(es). */
+	do {
+		idxd_batch = spdk_idxd_batch_create(chan->chan);
+		task_count = 0;
+		if (idxd_batch == NULL) {
+			/* Queue them all and try again later */
+			goto queue_tasks;
+		}
+
+		/* Keep track of each batch's tasks in case we need to cancel. */
+		TAILQ_INIT(&batch_tasks);
+		do {
+			switch (task->op_code) {
+			case ACCEL_OPCODE_MEMMOVE:
+				rc = spdk_idxd_batch_prep_copy(chan->chan, idxd_batch, task->dst, task->src, task->nbytes,
+							       idxd_done, task);
+				break;
+			case ACCEL_OPCODE_DUALCAST:
+				rc = spdk_idxd_batch_prep_dualcast(chan->chan, idxd_batch, task->dst, task->dst2,
+								   task->src, task->nbytes, idxd_done, task);
+				break;
+			case ACCEL_OPCODE_COMPARE:
+				rc = spdk_idxd_batch_prep_compare(chan->chan, idxd_batch, task->src, task->src2,
+								  task->nbytes, idxd_done, task);
+				break;
+			case ACCEL_OPCODE_MEMFILL:
+				rc = spdk_idxd_batch_prep_fill(chan->chan, idxd_batch, task->dst, task->fill_pattern,
+							       task->nbytes, idxd_done, task);
+				break;
+			case ACCEL_OPCODE_CRC32C:
+				rc = spdk_idxd_batch_prep_crc32c(chan->chan, idxd_batch, task->dst, task->src,
+								 task->seed, task->nbytes, idxd_done, task);
+				break;
+			default:
+				assert(false);
+				break;
+			}
+
+			tmp = TAILQ_NEXT(task, link);
+
+			if (rc == 0) {
+				TAILQ_INSERT_TAIL(&batch_tasks, task, link);
+			} else {
+				assert(rc != -EBUSY);
+				spdk_accel_task_complete(task, rc);
+			}
+
+			task_count++;
+			task = tmp;
+		} while (task && task_count < g_batch_max);
+
+		if (!TAILQ_EMPTY(&batch_tasks)) {
+			rc = spdk_idxd_batch_submit(chan->chan, idxd_batch, NULL, NULL);
+
+			/* If we can't submit the batch, just destroy it and queue up all the operations
+			 * from the latest batch and try again later. If this list was from an accel_fw batch,
+			 * all of the batch info is still associated with the tasks that we're about to
+			 * queue up so nothing is lost.
+			 */
+			if (rc) {
+				spdk_idxd_batch_cancel(chan->chan, idxd_batch);
+				while (!TAILQ_EMPTY(&batch_tasks)) {
+					batch_task = TAILQ_FIRST(&batch_tasks);
+					TAILQ_REMOVE(&batch_tasks, batch_task, link);
+					TAILQ_INSERT_TAIL(&chan->queued_tasks, batch_task, link);
+				}
+				rc = 0;
+			}
+		} else {
+			/* the last batch task list was empty so all tasks had their cb_fn called. */
+			rc = 0;
+		}
+	} while (task && rc == 0);
+
+	return 0;
+
+queue_tasks:
+	while (task != NULL) {
+		tmp = TAILQ_NEXT(task, link);
+		TAILQ_INSERT_TAIL(&chan->queued_tasks, task, link);
+		task = tmp;
+	}
+	return 0;
+}
+
 static int
 idxd_poll(void *arg)
 {
 	struct idxd_io_channel *chan = arg;
-	struct idxd_op *op = NULL;
-	int rc;
+	struct spdk_accel_task *task = NULL;
 
 	spdk_idxd_process_events(chan->chan);
 
@@ -137,45 +276,13 @@ idxd_poll(void *arg)
 		return -1;
 	}
 
-	while (!TAILQ_EMPTY(&chan->queued_ops)) {
-		op = TAILQ_FIRST(&chan->queued_ops);
+	/* Submit queued tasks */
+	if (!TAILQ_EMPTY(&chan->queued_tasks)) {
+		task = TAILQ_FIRST(&chan->queued_tasks);
 
-		switch (op->op_code) {
-		case IDXD_OPCODE_MEMMOVE:
-			rc = spdk_idxd_submit_copy(op->chan, op->dst, op->src, op->nbytes,
-						   op->cb_fn, op->cb_arg);
-			break;
-		case IDXD_OPCODE_DUALCAST:
-			rc = spdk_idxd_submit_dualcast(op->chan, op->dst, op->dst2, op->src, op->nbytes,
-						       op->cb_fn, op->cb_arg);
-			break;
-		case IDXD_OPCODE_COMPARE:
-			rc = spdk_idxd_submit_compare(op->chan, op->src, op->src2, op->nbytes,
-						      op->cb_fn, op->cb_arg);
-			break;
-		case IDXD_OPCODE_MEMFILL:
-			rc = spdk_idxd_submit_fill(op->chan, op->dst, op->fill_pattern, op->nbytes,
-						   op->cb_fn, op->cb_arg);
-			break;
-		case IDXD_OPCODE_CRC32C_GEN:
-			rc = spdk_idxd_submit_crc32c(op->chan, op->dst, op->src, op->seed, op->nbytes,
-						     op->cb_fn, op->cb_arg);
-			break;
-		case IDXD_OPCODE_BATCH:
-			rc = spdk_idxd_batch_submit(op->chan, op->batch, op->cb_fn, op->cb_arg);
-			break;
-		default:
-			/* Should never get here */
-			assert(false);
-			break;
-		}
-		if (rc == 0) {
-			TAILQ_REMOVE(&chan->queued_ops, op, link);
-			free(op);
-		} else {
-			/* Busy, resubmit to try again later */
-			break;
-		}
+		TAILQ_INIT(&chan->queued_tasks);
+
+		idxd_submit_tasks(task->accel_ch->engine_ch, task);
 	}
 
 	return -1;
@@ -184,403 +291,27 @@ idxd_poll(void *arg)
 static size_t
 accel_engine_idxd_get_ctx_size(void)
 {
-	return sizeof(struct idxd_task) + sizeof(struct spdk_accel_task);
-}
-
-static void
-idxd_done(void *cb_arg, int status)
-{
-	struct spdk_accel_task *accel_task;
-	struct idxd_task *idxd_task = cb_arg;
-
-	accel_task = SPDK_CONTAINEROF(idxd_task, struct spdk_accel_task,
-				      offload_ctx);
-
-	idxd_task->cb(accel_task, status);
-}
-
-static struct idxd_op *
-_prep_queue_command(struct idxd_io_channel *chan, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_op *op_to_queue;
-
-	op_to_queue = calloc(1, sizeof(struct idxd_op));
-	if (op_to_queue == NULL) {
-		SPDK_ERRLOG("Failed to allocate operation for queueing\n");
-		return NULL;
-	}
-
-	op_to_queue->chan = chan->chan;
-	op_to_queue->cb_fn = cb_fn;
-	op_to_queue->cb_arg = cb_arg;
-
-	return op_to_queue;
-}
-
-static int
-idxd_submit_copy(struct spdk_io_channel *ch, void *dst, void *src, uint64_t nbytes,
-		 spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	int rc = 0;
-
-	idxd_task->cb = cb_fn;
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_submit_copy(chan->chan, dst, src, nbytes, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->dst = dst;
-		op_to_queue->src = src;
-		op_to_queue->nbytes = nbytes;
-		op_to_queue->op_code = IDXD_OPCODE_MEMMOVE;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-idxd_submit_dualcast(struct spdk_io_channel *ch, void *dst1, void *dst2, void *src,
-		     uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	int rc = 0;
-
-	idxd_task->cb = cb_fn;
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_submit_dualcast(chan->chan, dst1, dst2, src, nbytes, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->dst = dst1;
-		op_to_queue->dst2 = dst2;
-		op_to_queue->src = src;
-		op_to_queue->nbytes = nbytes;
-		op_to_queue->op_code = IDXD_OPCODE_DUALCAST;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-idxd_submit_compare(struct spdk_io_channel *ch, void *src1, void *src2,
-		    uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	int rc = 0;
-
-	idxd_task->cb = cb_fn;
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_submit_compare(chan->chan, src1, src2, nbytes, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->src = src1;
-		op_to_queue->src2 = src2;
-		op_to_queue->nbytes = nbytes;
-		op_to_queue->op_code = IDXD_OPCODE_COMPARE;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-idxd_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill,
-		 uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	int rc = 0;
-	uint64_t fill_pattern;
-
-	idxd_task->cb = cb_fn;
-	memset(&fill_pattern, fill, sizeof(uint64_t));
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_submit_fill(chan->chan, dst, fill_pattern, nbytes, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->dst = dst;
-		op_to_queue->fill_pattern = fill_pattern;
-		op_to_queue->nbytes = nbytes;
-		op_to_queue->op_code = IDXD_OPCODE_MEMFILL;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-idxd_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src,
-		   uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	int rc = 0;
-
-	idxd_task->cb = cb_fn;
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_submit_crc32c(chan->chan, dst, src, seed, nbytes, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->dst = dst;
-		op_to_queue->src = src;
-		op_to_queue->seed = seed;
-		op_to_queue->nbytes = nbytes;
-		op_to_queue->op_code = IDXD_OPCODE_CRC32C_GEN;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
+	return 0;
 }
 
 static uint64_t
 idxd_get_capabilities(void)
 {
 	return ACCEL_COPY | ACCEL_FILL | ACCEL_CRC32C | ACCEL_COMPARE |
-	       ACCEL_DUALCAST | ACCEL_BATCH;
+	       ACCEL_DUALCAST;
 }
 
 static uint32_t
-idxd_batch_get_max(void)
+idxd_batch_get_max(struct spdk_io_channel *ch)
 {
 	return spdk_idxd_batch_get_max();
 }
 
-static struct spdk_accel_batch *
-idxd_batch_start(struct spdk_io_channel *ch)
-{
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-
-	return (struct spdk_accel_batch *)spdk_idxd_batch_create(chan->chan);
-}
-
-static int
-idxd_batch_cancel(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch)
-{
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	return spdk_idxd_batch_cancel(chan->chan, batch);
-}
-
-static int
-idxd_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-		  spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-	int rc = 0;
-
-	idxd_task->cb = cb_fn;
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_batch_submit(chan->chan, batch, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->batch = batch;
-		op_to_queue->op_code = IDXD_OPCODE_BATCH;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-idxd_batch_prep_copy(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-		     void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	idxd_task->cb = cb_fn;
-
-	return spdk_idxd_batch_prep_copy(chan->chan, batch, dst, src, nbytes,
-					 idxd_done, idxd_task);
-}
-
-static int
-idxd_batch_prep_fill(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-		     void *dst, uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	uint64_t fill_pattern;
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	idxd_task->cb = cb_fn;
-	memset(&fill_pattern, fill, sizeof(uint64_t));
-
-	return spdk_idxd_batch_prep_fill(chan->chan, batch, dst, fill_pattern, nbytes, idxd_done,
-					 idxd_task);
-}
-
-static int
-idxd_batch_prep_dualcast(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-			 void *dst1, void *dst2, void *src, uint64_t nbytes,
-			 spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	idxd_task->cb = cb_fn;
-
-	return spdk_idxd_batch_prep_dualcast(chan->chan, batch, dst1, dst2, src, nbytes, idxd_done,
-					     idxd_task);
-}
-
-static int
-idxd_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-		       uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
-		       spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	idxd_task->cb = cb_fn;
-
-	return spdk_idxd_batch_prep_crc32c(chan->chan, batch, dst, src, seed, nbytes, idxd_done,
-					   idxd_task);
-}
-
-static int
-idxd_batch_prep_compare(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-			void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	idxd_task->cb = cb_fn;
-
-	return spdk_idxd_batch_prep_compare(chan->chan, batch, src1, src2, nbytes, idxd_done,
-					    idxd_task);
-}
-
 static struct spdk_accel_engine idxd_accel_engine = {
 	.get_capabilities	= idxd_get_capabilities,
-	.copy			= idxd_submit_copy,
-	.batch_get_max		= idxd_batch_get_max,
-	.batch_create		= idxd_batch_start,
-	.batch_cancel		= idxd_batch_cancel,
-	.batch_prep_copy	= idxd_batch_prep_copy,
-	.batch_prep_fill	= idxd_batch_prep_fill,
-	.batch_prep_dualcast	= idxd_batch_prep_dualcast,
-	.batch_prep_crc32c	= idxd_batch_prep_crc32c,
-	.batch_prep_compare	= idxd_batch_prep_compare,
-	.batch_submit		= idxd_batch_submit,
-	.dualcast		= idxd_submit_dualcast,
-	.compare		= idxd_submit_compare,
-	.fill			= idxd_submit_fill,
-	.crc32c			= idxd_submit_crc32c,
 	.get_io_channel		= idxd_get_io_channel,
+	.batch_get_max		= idxd_batch_get_max,
+	.submit_tasks		= idxd_submit_tasks,
 };
 
 /*
@@ -652,7 +383,7 @@ idxd_create_cb(void *io_device, void *ctx_buf)
 
 	chan->dev = dev;
 	chan->poller = spdk_poller_register(idxd_poll, chan, 0);
-	TAILQ_INIT(&chan->queued_ops);
+	TAILQ_INIT(&chan->queued_tasks);
 
 	/*
 	 * Configure the channel but leave paused until all others
@@ -792,6 +523,7 @@ accel_engine_idxd_init(void)
 	}
 
 	g_idxd_initialized = true;
+	g_batch_max = spdk_idxd_batch_get_max();
 	SPDK_NOTICELOG("Accel engine updated to use IDXD DSA engine.\n");
 	spdk_accel_hw_engine_register(&idxd_accel_engine);
 	spdk_io_device_register(&idxd_accel_engine, idxd_create_cb, idxd_destroy_cb,
diff --git a/module/accel/ioat/accel_engine_ioat.c b/module/accel/ioat/accel_engine_ioat.c
index ba00ba3b8..a6c82c6f8 100644
--- a/module/accel/ioat/accel_engine_ioat.c
+++ b/module/accel/ioat/accel_engine_ioat.c
@@ -42,36 +42,7 @@
 #include "spdk/event.h"
 #include "spdk/thread.h"
 #include "spdk/ioat.h"
-#include "spdk/crc32.h"
 
-#define ALIGN_4K 0x1000
-
-enum ioat_accel_opcode {
-	IOAT_ACCEL_OPCODE_MEMMOVE	= 0,
-	IOAT_ACCEL_OPCODE_MEMFILL	= 1,
-	IOAT_ACCEL_OPCODE_COMPARE	= 2,
-	IOAT_ACCEL_OPCODE_CRC32C	= 3,
-	IOAT_ACCEL_OPCODE_DUALCAST	= 4,
-};
-
-struct ioat_accel_op {
-	struct ioat_io_channel		*ioat_ch;
-	void				*cb_arg;
-	spdk_accel_completion_cb	cb_fn;
-	void				*src;
-	union {
-		void			*dst;
-		void			*src2;
-	};
-	void				*dst2;
-	uint32_t			seed;
-	uint64_t			fill_pattern;
-	enum ioat_accel_opcode		op_code;
-	uint64_t			nbytes;
-	TAILQ_ENTRY(ioat_accel_op)	link;
-};
-
-static int g_batch_size;
 static bool g_ioat_enable = false;
 static bool g_ioat_initialized = false;
 
@@ -103,9 +74,6 @@ struct ioat_io_channel {
 	struct spdk_ioat_chan		*ioat_ch;
 	struct ioat_device		*ioat_dev;
 	struct spdk_poller		*poller;
-	TAILQ_HEAD(, ioat_accel_op)	op_pool;
-	TAILQ_HEAD(, ioat_accel_op)	sw_batch; /* for operations not hw accelerated */
-	bool				hw_batch; /* for operations that are hw accelerated */
 };
 
 static int
@@ -149,17 +117,13 @@ ioat_free_device(struct ioat_device *dev)
 	pthread_mutex_unlock(&g_ioat_mutex);
 }
 
-struct ioat_task {
-	spdk_accel_completion_cb	cb;
-};
-
 static int accel_engine_ioat_init(void);
 static void accel_engine_ioat_exit(void *ctx);
 
 static size_t
 accel_engine_ioat_get_ctx_size(void)
 {
-	return sizeof(struct ioat_task) + sizeof(struct spdk_accel_task);
+	return 0;
 }
 
 SPDK_ACCEL_MODULE_REGISTER(accel_engine_ioat_init, accel_engine_ioat_exit,
@@ -168,43 +132,9 @@ SPDK_ACCEL_MODULE_REGISTER(accel_engine_ioat_init, accel_engine_ioat_exit,
 static void
 ioat_done(void *cb_arg)
 {
-	struct spdk_accel_task *accel_task;
-	struct ioat_task *ioat_task = cb_arg;
+	struct spdk_accel_task *accel_task = cb_arg;
 
-	accel_task = (struct spdk_accel_task *)
-		     ((uintptr_t)ioat_task -
-		      offsetof(struct spdk_accel_task, offload_ctx));
-
-	ioat_task->cb(accel_task, 0);
-}
-
-static int
-ioat_submit_copy(struct spdk_io_channel *ch, void *dst, void *src, uint64_t nbytes,
-		 spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-
-	assert(ioat_ch->ioat_ch != NULL);
-
-	ioat_task->cb = cb_fn;
-
-	return spdk_ioat_submit_copy(ioat_ch->ioat_ch, ioat_task, ioat_done, dst, src, nbytes);
-}
-
-static int
-ioat_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill,
-		 uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-	uint64_t fill64 = 0x0101010101010101ULL * fill;
-
-	assert(ioat_ch->ioat_ch != NULL);
-
-	ioat_task->cb = cb_fn;
-
-	return spdk_ioat_submit_fill(ioat_ch->ioat_ch, ioat_task, ioat_done, dst, fill64, nbytes);
+	spdk_accel_task_complete(accel_task, 0);
 }
 
 static int
@@ -218,266 +148,62 @@ ioat_poll(void *arg)
 
 static struct spdk_io_channel *ioat_get_io_channel(void);
 
-/*
- * The IOAT engine only supports these capabilities as hardware
- * accelerated. The accel fw will handle unsupported functions
- * by calling the software implementations of the functions.
- */
 static uint64_t
 ioat_get_capabilities(void)
 {
-	return ACCEL_COPY | ACCEL_FILL | ACCEL_BATCH;
+	return ACCEL_COPY | ACCEL_FILL;
 }
 
-/* The IOAT batch functions exposed by the accel fw do not match up 1:1
- * with the functions in the IOAT library. The IOAT library directly only
- * supports construction of accelerated functions via the IOAT native
- * interface.  The accel_fw batch capabilities are implemented here in the
- * plug-in and rely on either the IOAT library for accelerated commands
- * or software functions for non-accelerated.
- */
 static uint32_t
-ioat_batch_get_max(void)
-{
-	return g_batch_size;
-}
-
-static struct spdk_accel_batch *
-ioat_batch_create(struct spdk_io_channel *ch)
+ioat_batch_get_max(struct spdk_io_channel *ch)
 {
 	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
 
-	if (!TAILQ_EMPTY(&ioat_ch->sw_batch) || (ioat_ch->hw_batch == true)) {
-		SPDK_ERRLOG("IOAT accel engine only supports one batch at a time.\n");
-		return NULL;
-	}
-
-	return (struct spdk_accel_batch *)&ioat_ch->hw_batch;
-}
-
-static struct ioat_accel_op *
-_prep_op(struct ioat_io_channel *ioat_ch, struct spdk_accel_batch *batch,
-	 spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_accel_op *op;
-
-	if ((struct spdk_accel_batch *)&ioat_ch->hw_batch != batch) {
-		SPDK_ERRLOG("Invalid batch\n");
-		return NULL;
-	}
-
-	if (!TAILQ_EMPTY(&ioat_ch->op_pool)) {
-		op = TAILQ_FIRST(&ioat_ch->op_pool);
-		TAILQ_REMOVE(&ioat_ch->op_pool, op, link);
-	} else {
-		SPDK_ERRLOG("Ran out of operations for batch\n");
-		return NULL;
-	}
-
-	op->cb_arg = cb_arg;
-	op->cb_fn = cb_fn;
-	op->ioat_ch = ioat_ch;
-
-	return op;
+	return spdk_ioat_get_max_descriptors(ioat_ch->ioat_dev->ioat);
 }
 
 static int
-ioat_batch_prep_copy(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-		     void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
+ioat_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *accel_task)
 {
 	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
+	struct spdk_accel_task *tmp;
+	int rc = 0;
 
-	ioat_task->cb = cb_fn;
-	ioat_ch->hw_batch = true;
-
-	/* Call the IOAT library prep function. */
-	return spdk_ioat_build_copy(ioat_ch->ioat_ch, ioat_task, ioat_done, dst, src, nbytes);
-}
-
-static int
-ioat_batch_prep_fill(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *dst,
-		     uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
-	uint64_t fill_pattern;
-
-	ioat_task->cb = cb_fn;
-	ioat_ch->hw_batch = true;
-	memset(&fill_pattern, fill, sizeof(uint64_t));
-
-	/* Call the IOAT library prep function. */
-	return spdk_ioat_build_fill(ioat_ch->ioat_ch, ioat_task, ioat_done, dst, fill_pattern, nbytes);
-}
-
-static int
-ioat_batch_prep_dualcast(struct spdk_io_channel *ch,
-			 struct spdk_accel_batch *batch, void *dst1, void *dst2,
-			 void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_accel_op *op;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-
-	if ((uintptr_t)dst1 & (ALIGN_4K - 1) || (uintptr_t)dst2 & (ALIGN_4K - 1)) {
-		SPDK_ERRLOG("Dualcast requires 4K alignment on dst addresses\n");
-		return -EINVAL;
-	}
-
-	op = _prep_op(ioat_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->src = src;
-	op->dst = dst1;
-	op->dst2 = dst2;
-	op->nbytes = nbytes;
-	op->op_code = IOAT_ACCEL_OPCODE_DUALCAST;
-	TAILQ_INSERT_TAIL(&ioat_ch->sw_batch, op, link);
-
-	return 0;
-}
-
-static int
-ioat_batch_prep_compare(struct spdk_io_channel *ch,
-			struct spdk_accel_batch *batch, void *src1,
-			void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_accel_op *op;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(ioat_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->src = src1;
-	op->src2 = src2;
-	op->nbytes = nbytes;
-	op->op_code = IOAT_ACCEL_OPCODE_COMPARE;
-	TAILQ_INSERT_TAIL(&ioat_ch->sw_batch, op, link);
-
-	return 0;
-}
-
-static int
-ioat_batch_prep_crc32c(struct spdk_io_channel *ch,
-		       struct spdk_accel_batch *batch, uint32_t *dst, void *src,
-		       uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_accel_op *op;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(ioat_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->dst = (void *)dst;
-	op->src = src;
-	op->seed = seed;
-	op->nbytes = nbytes;
-	op->op_code = IOAT_ACCEL_OPCODE_CRC32C;
-	TAILQ_INSERT_TAIL(&ioat_ch->sw_batch, op, link);
-
-	return 0;
-}
-
-static int
-ioat_batch_cancel(struct spdk_io_channel *ch, struct spdk_accel_batch *batch)
-{
-	struct ioat_accel_op *op;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-
-	if ((struct spdk_accel_batch *)&ioat_ch->hw_batch != batch) {
-		SPDK_ERRLOG("Invalid batch\n");
-		return -EINVAL;
-	}
-
-	/* Flush the batched HW items, there's no way to cancel these without resetting. */
-	spdk_ioat_flush(ioat_ch->ioat_ch);
-	ioat_ch->hw_batch = false;
-
-	/* Return batched software items to the pool. */
-	while ((op = TAILQ_FIRST(&ioat_ch->sw_batch))) {
-		TAILQ_REMOVE(&ioat_ch->sw_batch, op, link);
-		TAILQ_INSERT_TAIL(&ioat_ch->op_pool, op, link);
-	}
-
-	return 0;
-}
-
-static int
-ioat_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-		  spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_accel_op *op;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-	int batch_status = 0, cmd_status = 0;
-
-	if ((struct spdk_accel_batch *)&ioat_ch->hw_batch != batch) {
-		SPDK_ERRLOG("Invalid batch\n");
-		return -EINVAL;
-	}
-
-	/* Flush the batched HW items first. */
-	spdk_ioat_flush(ioat_ch->ioat_ch);
-	ioat_ch->hw_batch = false;
-
-	/* Complete the batched software items. */
-	while ((op = TAILQ_FIRST(&ioat_ch->sw_batch))) {
-		TAILQ_REMOVE(&ioat_ch->sw_batch, op, link);
-		accel_task = (struct spdk_accel_task *)((uintptr_t)op->cb_arg -
-							offsetof(struct spdk_accel_task, offload_ctx));
-
-		switch (op->op_code) {
-		case IOAT_ACCEL_OPCODE_DUALCAST:
-			memcpy(op->dst, op->src, op->nbytes);
-			memcpy(op->dst2, op->src, op->nbytes);
+	do {
+		switch (accel_task->op_code) {
+		case ACCEL_OPCODE_MEMFILL:
+			rc = spdk_ioat_build_fill(ioat_ch->ioat_ch, accel_task, ioat_done,
+						  accel_task->dst, accel_task->fill_pattern, accel_task->nbytes);
 			break;
-		case IOAT_ACCEL_OPCODE_COMPARE:
-			cmd_status = memcmp(op->src, op->src2, op->nbytes);
-			break;
-		case IOAT_ACCEL_OPCODE_CRC32C:
-			*(uint32_t *)op->dst = spdk_crc32c_update(op->src, op->nbytes, ~op->seed);
+		case ACCEL_OPCODE_MEMMOVE:
+			rc = spdk_ioat_build_copy(ioat_ch->ioat_ch, accel_task, ioat_done,
+						  accel_task->dst, accel_task->src, accel_task->nbytes);
 			break;
 		default:
 			assert(false);
 			break;
 		}
 
-		batch_status |= cmd_status;
-		op->cb_fn(accel_task, cmd_status);
-		TAILQ_INSERT_TAIL(&ioat_ch->op_pool, op, link);
-	}
+		tmp = TAILQ_NEXT(accel_task, link);
 
-	/* Now complete the batch request itself. */
-	accel_task = (struct spdk_accel_task *)((uintptr_t)cb_arg -
-						offsetof(struct spdk_accel_task, offload_ctx));
-	cb_fn(accel_task, batch_status);
+		/* Report any build errors via the callback now. */
+		if (rc) {
+			spdk_accel_task_complete(accel_task, rc);
+		}
+
+		accel_task = tmp;
+	} while (accel_task);
+
+	spdk_ioat_flush(ioat_ch->ioat_ch);
 
 	return 0;
 }
 
 static struct spdk_accel_engine ioat_accel_engine = {
 	.get_capabilities	= ioat_get_capabilities,
-	.copy			= ioat_submit_copy,
-	.fill			= ioat_submit_fill,
-	.batch_get_max		= ioat_batch_get_max,
-	.batch_create		= ioat_batch_create,
-	.batch_cancel		= ioat_batch_cancel,
-	.batch_prep_copy	= ioat_batch_prep_copy,
-	.batch_prep_dualcast	= ioat_batch_prep_dualcast,
-	.batch_prep_compare	= ioat_batch_prep_compare,
-	.batch_prep_fill	= ioat_batch_prep_fill,
-	.batch_prep_crc32c	= ioat_batch_prep_crc32c,
-	.batch_submit		= ioat_batch_submit,
 	.get_io_channel		= ioat_get_io_channel,
+	.batch_get_max		= ioat_batch_get_max,
+	.submit_tasks		= ioat_submit_tasks,
 };
 
 static int
@@ -485,35 +211,16 @@ ioat_create_cb(void *io_device, void *ctx_buf)
 {
 	struct ioat_io_channel *ch = ctx_buf;
 	struct ioat_device *ioat_dev;
-	struct ioat_accel_op *op;
-	int i;
 
 	ioat_dev = ioat_allocate_device();
 	if (ioat_dev == NULL) {
 		return -1;
 	}
 
-	TAILQ_INIT(&ch->sw_batch);
-	ch->hw_batch = false;
-	TAILQ_INIT(&ch->op_pool);
-
-	g_batch_size = spdk_ioat_get_max_descriptors(ioat_dev->ioat);
-	for (i = 0 ; i < g_batch_size ; i++) {
-		op = calloc(1, sizeof(struct ioat_accel_op));
-		if (op == NULL) {
-			SPDK_ERRLOG("Failed to allocate operation for batch.\n");
-			while ((op = TAILQ_FIRST(&ch->op_pool))) {
-				TAILQ_REMOVE(&ch->op_pool, op, link);
-				free(op);
-			}
-			return -ENOMEM;
-		}
-		TAILQ_INSERT_TAIL(&ch->op_pool, op, link);
-	}
-
 	ch->ioat_dev = ioat_dev;
 	ch->ioat_ch = ioat_dev->ioat;
 	ch->poller = SPDK_POLLER_REGISTER(ioat_poll, ch->ioat_ch, 0);
+
 	return 0;
 }
 
@@ -521,12 +228,6 @@ static void
 ioat_destroy_cb(void *io_device, void *ctx_buf)
 {
 	struct ioat_io_channel *ch = ctx_buf;
-	struct ioat_accel_op *op;
-
-	while ((op = TAILQ_FIRST(&ch->op_pool))) {
-		TAILQ_REMOVE(&ch->op_pool, op, link);
-		free(op);
-	}
 
 	ioat_free_device(ch->ioat_dev);
 	spdk_poller_unregister(&ch->poller);