diff --git a/examples/ioat/perf/perf.c b/examples/ioat/perf/perf.c
index 5b306976f..8b4ff3ea3 100644
--- a/examples/ioat/perf/perf.c
+++ b/examples/ioat/perf/perf.c
@@ -51,6 +51,7 @@ struct user_config {
 	int time_in_sec;
 	bool verify;
 	char *core_mask;
+	int ioat_chan_num;
 };
 
 struct ioat_device {
@@ -63,30 +64,42 @@ static struct ioat_device *g_next_device;
 
 static struct user_config g_user_config;
 
-struct thread_entry {
+struct ioat_chan_entry {
 	struct spdk_ioat_chan *chan;
+	int ioat_chan_id;
 	uint64_t xfer_completed;
 	uint64_t xfer_failed;
 	uint64_t current_queue_depth;
-	unsigned lcore_id;
 	bool is_draining;
 	struct spdk_mempool *data_pool;
 	struct spdk_mempool *task_pool;
+	struct ioat_chan_entry *next;
+};
+
+struct worker_thread {
+	struct ioat_chan_entry 	*ctx;
+	struct worker_thread	*next;
+	unsigned		lcore;
 };
 
 struct ioat_task {
-	struct thread_entry *thread_entry;
+	struct ioat_chan_entry *ioat_chan_entry;
 	void *src;
 	void *dst;
 };
 
-static void submit_single_xfer(struct thread_entry *thread_entry, struct ioat_task *ioat_task,
+static struct worker_thread *g_workers = NULL;
+static int g_num_workers = 0;
+static int g_ioat_chan_num = 0;
+
+static void submit_single_xfer(struct ioat_chan_entry *ioat_chan_entry, struct ioat_task *ioat_task,
 			       void *dst, void *src);
 
 static void
 construct_user_config(struct user_config *self)
 {
 	self->xfer_size_bytes = 4096;
+	self->ioat_chan_num = 1;
 	self->queue_depth = 256;
 	self->time_in_sec = 10;
 	self->verify = false;
@@ -97,6 +110,7 @@ static void
 dump_user_config(struct user_config *self)
 {
 	printf("User configuration:\n");
+	printf("Number of channels:    %u\n", self->ioat_chan_num);
 	printf("Transfer size:  %u bytes\n", self->xfer_size_bytes);
 	printf("Queue depth:    %u\n", self->queue_depth);
 	printf("Run time:       %u seconds\n", self->time_in_sec);
@@ -123,22 +137,80 @@ static void
 ioat_done(void *cb_arg)
 {
 	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
-	struct thread_entry *thread_entry = ioat_task->thread_entry;
+	struct ioat_chan_entry *ioat_chan_entry = ioat_task->ioat_chan_entry;
 
 	if (g_user_config.verify && memcmp(ioat_task->src, ioat_task->dst, g_user_config.xfer_size_bytes)) {
-		thread_entry->xfer_failed++;
+		ioat_chan_entry->xfer_failed++;
 	} else {
-		thread_entry->xfer_completed++;
+		ioat_chan_entry->xfer_completed++;
 	}
 
-	thread_entry->current_queue_depth--;
+	ioat_chan_entry->current_queue_depth--;
 
-	if (thread_entry->is_draining) {
-		spdk_mempool_put(thread_entry->data_pool, ioat_task->src);
-		spdk_mempool_put(thread_entry->data_pool, ioat_task->dst);
-		spdk_mempool_put(thread_entry->task_pool, ioat_task);
+	if (ioat_chan_entry->is_draining) {
+		spdk_mempool_put(ioat_chan_entry->data_pool, ioat_task->src);
+		spdk_mempool_put(ioat_chan_entry->data_pool, ioat_task->dst);
+		spdk_mempool_put(ioat_chan_entry->task_pool, ioat_task);
 	} else {
-		submit_single_xfer(thread_entry, ioat_task, ioat_task->dst, ioat_task->src);
+		submit_single_xfer(ioat_chan_entry, ioat_task, ioat_task->dst, ioat_task->src);
+	}
+}
+
+static int
+register_workers(void)
+{
+	unsigned lcore;
+	struct worker_thread *worker;
+	struct worker_thread *prev_worker;
+
+	worker = malloc(sizeof(struct worker_thread));
+	if (worker == NULL) {
+		perror("worker_thread malloc");
+		return -1;
+	}
+
+	memset(worker, 0, sizeof(struct worker_thread));
+	worker->lcore = rte_get_master_lcore();
+
+	g_workers = worker;
+	g_num_workers = 1;
+
+	RTE_LCORE_FOREACH_SLAVE(lcore) {
+		prev_worker = worker;
+		worker = malloc(sizeof(struct worker_thread));
+		if (worker == NULL) {
+			perror("worker_thread malloc");
+			return -1;
+		}
+
+		memset(worker, 0, sizeof(struct worker_thread));
+		worker->lcore = lcore;
+		prev_worker->next = worker;
+		g_num_workers++;
+	}
+
+	return 0;
+}
+
+static void
+unregister_workers(void)
+{
+	struct worker_thread *worker = g_workers;
+	struct ioat_chan_entry *entry, *entry1;
+
+	/* Free ioat_chan_entry and worker thread */
+	while (worker) {
+		struct worker_thread *next_worker = worker->next;
+		entry = worker->ctx;
+		while (entry) {
+			entry1 = entry->next;
+			spdk_mempool_free(entry->data_pool);
+			spdk_mempool_free(entry->task_pool);
+			free(entry);
+			entry = entry1;
+		}
+		free(worker);
+		worker = next_worker;
 	}
 }
 
@@ -159,6 +231,10 @@ attach_cb(void *cb_ctx, struct spdk_pci_device *pci_dev, struct spdk_ioat_chan *
 {
 	struct ioat_device *dev;
 
+	if (g_ioat_chan_num >= g_user_config.ioat_chan_num) {
+		return;
+	}
+
 	dev = spdk_zmalloc(sizeof(*dev), 0, NULL);
 	if (dev == NULL) {
 		printf("Failed to allocate device struct\n");
@@ -166,6 +242,7 @@ attach_cb(void *cb_ctx, struct spdk_pci_device *pci_dev, struct spdk_ioat_chan *
 	}
 
 	dev->ioat = ioat;
+	g_ioat_chan_num++;
 	TAILQ_INSERT_TAIL(&g_devices, dev, tailq);
 }
 
@@ -189,6 +266,7 @@ usage(char *program_name)
 	printf("\t[-h help message]\n");
 	printf("\t[-c core mask for distributing I/O submission/completion work]\n");
 	printf("\t[-q queue depth]\n");
+	printf("\t[-n number of channels]\n");
 	printf("\t[-s transfer size in bytes]\n");
 	printf("\t[-t time in seconds]\n");
 	printf("\t[-v verify copy result if this switch is on]\n");
@@ -200,11 +278,14 @@ parse_args(int argc, char **argv)
 	int op;
 
 	construct_user_config(&g_user_config);
-	while ((op = getopt(argc, argv, "c:hq:s:t:v")) != -1) {
+	while ((op = getopt(argc, argv, "c:hn:q:s:t:v")) != -1) {
 		switch (op) {
 		case 's':
 			g_user_config.xfer_size_bytes = atoi(optarg);
 			break;
+		case 'n':
+			g_user_config.ioat_chan_num = atoi(optarg);
+			break;
 		case 'q':
 			g_user_config.queue_depth = atoi(optarg);
 			break;
@@ -226,7 +307,8 @@ parse_args(int argc, char **argv)
 		}
 	}
 	if (!g_user_config.xfer_size_bytes || !g_user_config.queue_depth ||
-	    !g_user_config.time_in_sec || !g_user_config.core_mask) {
+	    !g_user_config.time_in_sec || !g_user_config.core_mask ||
+	    !g_user_config.ioat_chan_num) {
 		usage(argv[0]);
 		return 1;
 	}
@@ -235,75 +317,79 @@ parse_args(int argc, char **argv)
 }
 
 static void
-drain_io(struct thread_entry *thread_entry)
+drain_io(struct ioat_chan_entry *ioat_chan_entry)
 {
-	while (thread_entry->current_queue_depth > 0) {
-		spdk_ioat_process_events(thread_entry->chan);
+	while (ioat_chan_entry->current_queue_depth > 0) {
+		spdk_ioat_process_events(ioat_chan_entry->chan);
 	}
 }
 
 static void
-submit_single_xfer(struct thread_entry *thread_entry, struct ioat_task *ioat_task, void *dst,
+submit_single_xfer(struct ioat_chan_entry *ioat_chan_entry, struct ioat_task *ioat_task, void *dst,
 		   void *src)
 {
-	ioat_task->thread_entry = thread_entry;
+	ioat_task->ioat_chan_entry = ioat_chan_entry;
 	ioat_task->src = src;
 	ioat_task->dst = dst;
 
-	spdk_ioat_submit_copy(thread_entry->chan, ioat_task, ioat_done, dst, src,
+	spdk_ioat_submit_copy(ioat_chan_entry->chan, ioat_task, ioat_done, dst, src,
 			      g_user_config.xfer_size_bytes);
 
-	thread_entry->current_queue_depth++;
+	ioat_chan_entry->current_queue_depth++;
 }
 
 static void
-submit_xfers(struct thread_entry *thread_entry, uint64_t queue_depth)
+submit_xfers(struct ioat_chan_entry *ioat_chan_entry, uint64_t queue_depth)
 {
 	while (queue_depth-- > 0) {
 		void *src = NULL, *dst = NULL;
 		struct ioat_task *ioat_task = NULL;
 
-		src = spdk_mempool_get(thread_entry->data_pool);
-		dst = spdk_mempool_get(thread_entry->data_pool);
-		ioat_task = spdk_mempool_get(thread_entry->task_pool);
+		src = spdk_mempool_get(ioat_chan_entry->data_pool);
+		dst = spdk_mempool_get(ioat_chan_entry->data_pool);
+		ioat_task = spdk_mempool_get(ioat_chan_entry->task_pool);
 
-		submit_single_xfer(thread_entry, ioat_task, dst, src);
+		submit_single_xfer(ioat_chan_entry, ioat_task, dst, src);
 	}
 }
 
 static int
 work_fn(void *arg)
 {
-	char buf_pool_name[20], task_pool_name[20];
 	uint64_t tsc_end;
-	struct thread_entry *t = (struct thread_entry *)arg;
+	struct worker_thread *worker = (struct worker_thread *)arg;
+	struct ioat_chan_entry *t = NULL;
 
-	if (!t->chan) {
-		return 0;
-	}
-
-	t->lcore_id = rte_lcore_id();
-
-	snprintf(buf_pool_name, sizeof(buf_pool_name), "buf_pool_%d", rte_lcore_id());
-	snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%d", rte_lcore_id());
-	t->data_pool = spdk_mempool_create(buf_pool_name, 512, g_user_config.xfer_size_bytes, -1);
-	t->task_pool = spdk_mempool_create(task_pool_name, 512, sizeof(struct ioat_task), -1);
-	if (!t->data_pool || !t->task_pool) {
-		fprintf(stderr, "Could not allocate buffer pool.\n");
-		return 1;
-	}
+	printf("Starting thread on core %u\n", worker->lcore);
 
 	tsc_end = spdk_get_ticks() + g_user_config.time_in_sec * spdk_get_ticks_hz();
 
-	// begin to submit transfers
-	submit_xfers(t, g_user_config.queue_depth);
-	while (spdk_get_ticks() < tsc_end) {
-		spdk_ioat_process_events(t->chan);
+	t = worker->ctx;
+	while (t != NULL) {
+		// begin to submit transfers
+		submit_xfers(t, g_user_config.queue_depth);
+		t = t->next;
 	}
 
-	// begin to drain io
-	t->is_draining = true;
-	drain_io(t);
+	while (1) {
+		t = worker->ctx;
+		while (t != NULL) {
+			spdk_ioat_process_events(t->chan);
+			t = t->next;
+		}
+
+		if (spdk_get_ticks() > tsc_end) {
+			break;
+		}
+	}
+
+	t = worker->ctx;
+	while (t != NULL) {
+		// begin to drain io
+		t->is_draining = true;
+		drain_io(t);
+		t = t->next;
+	}
 
 	return 0;
 }
@@ -328,47 +414,47 @@ init(void)
 
 	free(core_mask_conf);
 
-	if (ioat_init() != 0) {
-		fprintf(stderr, "Could not init ioat\n");
-		return 1;
-	}
-
 	return 0;
 }
 
 static int
-dump_result(struct thread_entry *threads, int len)
+dump_result(void)
 {
-	int i;
 	uint64_t total_completed = 0;
 	uint64_t total_failed = 0;
-	uint64_t total_xfer_per_sec, total_bw_in_MBps;
+	uint64_t total_xfer_per_sec, total_bw_in_MiBps;
+	struct worker_thread *worker = g_workers;
 
-	printf("lcore     Transfers        Bandwidth  Failed\n");
-	printf("--------------------------------------------\n");
-	for (i = 0; i < len; i++) {
-		struct thread_entry *t = &threads[i];
+	printf("Channel_ID     Lcore     Transfers     Bandwidth     Failed\n");
+	printf("-----------------------------------------------------------\n");
+	while (worker != NULL) {
+		struct ioat_chan_entry *t = worker->ctx;
+		while (t) {
+			uint64_t xfer_per_sec = t->xfer_completed / g_user_config.time_in_sec;
+			uint64_t bw_in_MiBps = (t->xfer_completed * g_user_config.xfer_size_bytes) /
+					       (g_user_config.time_in_sec * 1024 * 1024);
 
-		uint64_t xfer_per_sec = t->xfer_completed / g_user_config.time_in_sec;
-		uint64_t bw_in_MBps = (t->xfer_completed * g_user_config.xfer_size_bytes) /
-				      (g_user_config.time_in_sec * 1024 * 1024);
+			total_completed += t->xfer_completed;
+			total_failed += t->xfer_failed;
 
-		total_completed += t->xfer_completed;
-		total_failed += t->xfer_failed;
-
-		if (xfer_per_sec) {
-			printf("%5d  %10" PRIu64 "/s  %10" PRIu64 " MB/s  %6" PRIu64 "\n",
-			       t->lcore_id, xfer_per_sec, bw_in_MBps, t->xfer_failed);
+			if (xfer_per_sec) {
+				printf("%10d%10d%12" PRIu64 "/s%8" PRIu64 " MiB/s%11" PRIu64 "\n",
+				       t->ioat_chan_id, worker->lcore, xfer_per_sec,
+				       bw_in_MiBps, t->xfer_failed);
+			}
+			t = t->next;
 		}
+		worker = worker->next;
 	}
 
 	total_xfer_per_sec = total_completed / g_user_config.time_in_sec;
-	total_bw_in_MBps = (total_completed * g_user_config.xfer_size_bytes) /
-			   (g_user_config.time_in_sec * 1024 * 1024);
+	total_bw_in_MiBps = (total_completed * g_user_config.xfer_size_bytes) /
+			    (g_user_config.time_in_sec * 1024 * 1024);
+
+	printf("===========================================================\n");
+	printf("Total:%26" PRIu64 "/s%8" PRIu64 " MiB/s%11" PRIu64 "\n",
+	       total_xfer_per_sec, total_bw_in_MiBps, total_failed);
 
-	printf("============================================\n");
-	printf("Total: %10" PRIu64 "/s  %10" PRIu64 " MB/s  %6" PRIu64 "\n",
-	       total_xfer_per_sec, total_bw_in_MBps, total_failed);
 	return total_failed ? 1 : 0;
 }
 
@@ -378,7 +464,6 @@ get_next_chan(void)
 	struct spdk_ioat_chan *chan;
 
 	if (g_next_device == NULL) {
-		fprintf(stderr, "Not enough ioat channels found. Check that ioatdma driver is unloaded.\n");
 		return NULL;
 	}
 
@@ -389,12 +474,55 @@ get_next_chan(void)
 	return chan;
 }
 
+static int
+associate_workers_with_chan(void)
+{
+	struct spdk_ioat_chan *chan = get_next_chan();
+	struct worker_thread	*worker = g_workers;
+	struct ioat_chan_entry	*t;
+	char buf_pool_name[20], task_pool_name[20];
+	int i = 0;
+
+	while (chan != NULL) {
+		t = calloc(1, sizeof(struct ioat_chan_entry));
+		if (!t) {
+			return -1;
+		}
+
+		t->ioat_chan_id = i;
+		snprintf(buf_pool_name, sizeof(buf_pool_name), "buf_pool_%d", i);
+		snprintf(task_pool_name, sizeof(task_pool_name), "task_pool_%d", i);
+		t->data_pool = spdk_mempool_create(buf_pool_name, 512, g_user_config.xfer_size_bytes, -1);
+		t->task_pool = spdk_mempool_create(task_pool_name, 512, sizeof(struct ioat_task), -1);
+		if (!t->data_pool || !t->task_pool) {
+			fprintf(stderr, "Could not allocate buffer pool.\n");
+			spdk_mempool_free(t->data_pool);
+			spdk_mempool_free(t->task_pool);
+			free(t);
+			return 1;
+		}
+		printf("Associating ioat_channel %d with lcore %d\n", i, worker->lcore);
+		t->chan = chan;
+		t->next = worker->ctx;
+		worker->ctx = t;
+
+		worker = worker->next;
+		if (worker == NULL) {
+			worker = g_workers;
+		}
+
+		chan = get_next_chan();
+		i++;
+	}
+
+	return 0;
+}
+
 int
 main(int argc, char **argv)
 {
-	unsigned lcore_id;
-	struct thread_entry threads[RTE_MAX_LCORE] = {};
 	int rc;
+	struct worker_thread *worker;
 
 	if (parse_args(argc, argv) != 0) {
 		return 1;
@@ -404,30 +532,56 @@ main(int argc, char **argv)
 		return 1;
 	}
 
-	dump_user_config(&g_user_config);
-
-	g_next_device = TAILQ_FIRST(&g_devices);
-	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
-		threads[lcore_id].chan = get_next_chan();
-		rte_eal_remote_launch(work_fn, &threads[lcore_id], lcore_id);
-	}
-
-	threads[rte_get_master_lcore()].chan = get_next_chan();
-	if (work_fn(&threads[rte_get_master_lcore()]) != 0) {
-		rc = 1;
+	if (register_workers() != 0) {
+		rc = -1;
 		goto cleanup;
 	}
 
-	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
-		if (rte_eal_wait_lcore(lcore_id) != 0) {
-			rc = 1;
-			goto cleanup;
-		}
+	if (ioat_init() != 0) {
+		rc = -1;
+		goto cleanup;
 	}
 
-	rc = dump_result(threads, RTE_MAX_LCORE);
+	if (g_user_config.ioat_chan_num > g_ioat_chan_num) {
+		printf("%d channels are requested, but only %d are found,"
+		       "so only test %d channels\n", g_user_config.ioat_chan_num,
+		       g_ioat_chan_num, g_ioat_chan_num);
+		g_user_config.ioat_chan_num = g_ioat_chan_num;
+	}
+
+	g_next_device = TAILQ_FIRST(&g_devices);
+	dump_user_config(&g_user_config);
+
+	if (associate_workers_with_chan() != 0) {
+		rc = -1;
+		goto cleanup;
+	}
+
+	/* Launch all of the slave workers */
+	worker = g_workers->next;
+	while (worker != NULL) {
+		rte_eal_remote_launch(work_fn, worker, worker->lcore);
+		worker = worker->next;
+	}
+
+	rc = work_fn(g_workers);
+	if (rc < 0) {
+		goto cleanup;
+	}
+
+	worker = g_workers->next;
+	while (worker != NULL) {
+		if (rte_eal_wait_lcore(worker->lcore) < 0) {
+			rc = -1;
+			goto cleanup;
+		}
+		worker = worker->next;
+	}
+
+	rc = dump_result();
 
 cleanup:
+	unregister_workers();
 	ioat_exit();
 
 	return rc;