nvme/perf: Add support for multiple cores per device.
Intelligently allocate cores and devices to handle the following cases: 1) Equal cores and devices 2) More cores than devices by using multiple cores per device 3) More devices than cores by using multiple devices from a single core Change-Id: I3703f5c523268539bd00d399fe104c474a8e8c99 Signed-off-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
parent
afed5ba9da
commit
70db0e1c08
@ -58,6 +58,7 @@
|
||||
struct ctrlr_entry {
|
||||
struct nvme_controller *ctrlr;
|
||||
struct ctrlr_entry *next;
|
||||
char name[1024];
|
||||
};
|
||||
|
||||
enum entry_type {
|
||||
@ -76,24 +77,32 @@ struct ns_entry {
|
||||
#if HAVE_LIBAIO
|
||||
struct {
|
||||
int fd;
|
||||
io_context_t ctx;
|
||||
struct io_event *events;
|
||||
} aio;
|
||||
#endif
|
||||
} u;
|
||||
|
||||
struct ns_entry *next;
|
||||
uint32_t io_size_blocks;
|
||||
int io_completed;
|
||||
int current_queue_depth;
|
||||
uint64_t size_in_ios;
|
||||
uint64_t offset_in_ios;
|
||||
bool is_draining;
|
||||
char name[1024];
|
||||
};
|
||||
|
||||
struct perf_task {
|
||||
struct ns_worker_ctx {
|
||||
struct ns_entry *entry;
|
||||
uint64_t io_completed;
|
||||
uint64_t current_queue_depth;
|
||||
uint64_t offset_in_ios;
|
||||
bool is_draining;
|
||||
|
||||
#if HAVE_LIBAIO
|
||||
struct io_event *events;
|
||||
io_context_t ctx;
|
||||
#endif
|
||||
struct ns_worker_ctx *next;
|
||||
};
|
||||
|
||||
struct perf_task {
|
||||
struct ns_worker_ctx *ns_ctx;
|
||||
void *buf;
|
||||
#if HAVE_LIBAIO
|
||||
struct iocb iocb;
|
||||
@ -101,7 +110,7 @@ struct perf_task {
|
||||
};
|
||||
|
||||
struct worker_thread {
|
||||
struct ns_entry *namespaces;
|
||||
struct ns_worker_ctx *ns_ctx;
|
||||
struct worker_thread *next;
|
||||
unsigned lcore;
|
||||
};
|
||||
@ -110,8 +119,10 @@ struct rte_mempool *request_mempool;
|
||||
static struct rte_mempool *task_pool;
|
||||
|
||||
static struct ctrlr_entry *g_controllers = NULL;
|
||||
static struct ns_entry *g_namespaces = NULL;
|
||||
static int g_num_namespaces = 0;
|
||||
static struct worker_thread *g_workers = NULL;
|
||||
static struct worker_thread *g_current_worker = NULL;
|
||||
static int g_num_workers = 0;
|
||||
|
||||
static uint64_t g_tsc_rate;
|
||||
|
||||
@ -131,33 +142,21 @@ task_complete(struct perf_task *task);
|
||||
static void
|
||||
register_ns(struct nvme_controller *ctrlr, struct pci_device *pci_dev, struct nvme_namespace *ns)
|
||||
{
|
||||
struct worker_thread *worker;
|
||||
struct ns_entry *entry = malloc(sizeof(struct ns_entry));
|
||||
const struct nvme_controller_data *cdata = nvme_ctrlr_get_data(ctrlr);
|
||||
|
||||
worker = g_current_worker;
|
||||
|
||||
entry->type = ENTRY_TYPE_NVME_NS;
|
||||
entry->u.nvme.ctrlr = ctrlr;
|
||||
entry->u.nvme.ns = ns;
|
||||
entry->next = worker->namespaces;
|
||||
entry->io_completed = 0;
|
||||
entry->current_queue_depth = 0;
|
||||
entry->offset_in_ios = 0;
|
||||
entry->size_in_ios = nvme_ns_get_size(ns) /
|
||||
g_io_size_bytes;
|
||||
entry->io_size_blocks = g_io_size_bytes / nvme_ns_get_sector_size(ns);
|
||||
entry->is_draining = false;
|
||||
|
||||
snprintf(entry->name, sizeof(cdata->mn), "%s", cdata->mn);
|
||||
printf("Assigning namespace %s to lcore %u\n", entry->name, worker->lcore);
|
||||
worker->namespaces = entry;
|
||||
snprintf(entry->name, 44, "%-20.20s (%-20.20s)", cdata->mn, cdata->sn);
|
||||
|
||||
if (worker->next == NULL) {
|
||||
g_current_worker = g_workers;
|
||||
} else {
|
||||
g_current_worker = worker->next;
|
||||
}
|
||||
g_num_namespaces++;
|
||||
entry->next = g_namespaces;
|
||||
g_namespaces = entry;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -181,7 +180,6 @@ register_ctrlr(struct nvme_controller *ctrlr, struct pci_device *pci_dev)
|
||||
static int
|
||||
register_aio_file(const char *path)
|
||||
{
|
||||
struct worker_thread *worker;
|
||||
struct ns_entry *entry;
|
||||
|
||||
int flags, fd;
|
||||
@ -216,36 +214,18 @@ register_aio_file(const char *path)
|
||||
return -1;
|
||||
}
|
||||
|
||||
worker = g_current_worker;
|
||||
|
||||
entry = malloc(sizeof(struct ns_entry));
|
||||
|
||||
entry->type = ENTRY_TYPE_AIO_FILE;
|
||||
entry->u.aio.fd = fd;
|
||||
entry->u.aio.ctx = 0;
|
||||
if (io_setup(g_queue_depth, &entry->u.aio.ctx) < 0) {
|
||||
perror("io_setup");
|
||||
return -1;
|
||||
}
|
||||
entry->u.aio.events = calloc(g_queue_depth, sizeof(struct io_event));
|
||||
entry->next = worker->namespaces;
|
||||
entry->io_completed = 0;
|
||||
entry->current_queue_depth = 0;
|
||||
entry->offset_in_ios = 0;
|
||||
entry->size_in_ios = size / g_io_size_bytes;
|
||||
entry->io_size_blocks = g_io_size_bytes / blklen;
|
||||
entry->is_draining = false;
|
||||
|
||||
snprintf(entry->name, sizeof(entry->name), "%s", path);
|
||||
|
||||
printf("Assigning AIO device %s to lcore %u\n", entry->name, worker->lcore);
|
||||
worker->namespaces = entry;
|
||||
|
||||
if (worker->next == NULL) {
|
||||
g_current_worker = g_workers;
|
||||
} else {
|
||||
g_current_worker = worker->next;
|
||||
}
|
||||
g_num_namespaces++;
|
||||
entry->next = g_namespaces;
|
||||
g_namespaces = entry;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -271,7 +251,7 @@ aio_submit(io_context_t aio_ctx, struct iocb *iocb, int fd, enum io_iocb_cmd cmd
|
||||
}
|
||||
|
||||
static void
|
||||
aio_check_io(struct ns_entry *entry)
|
||||
aio_check_io(struct ns_worker_ctx *ns_ctx)
|
||||
{
|
||||
int count, i;
|
||||
struct timespec timeout;
|
||||
@ -279,14 +259,14 @@ aio_check_io(struct ns_entry *entry)
|
||||
timeout.tv_sec = 0;
|
||||
timeout.tv_nsec = 0;
|
||||
|
||||
count = io_getevents(entry->u.aio.ctx, 1, g_queue_depth, entry->u.aio.events, &timeout);
|
||||
count = io_getevents(ns_ctx->ctx, 1, g_queue_depth, ns_ctx->events, &timeout);
|
||||
if (count < 0) {
|
||||
fprintf(stderr, "io_getevents error\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
task_complete(entry->u.aio.events[i].data);
|
||||
task_complete(ns_ctx->events[i].data);
|
||||
}
|
||||
}
|
||||
#endif /* HAVE_LIBAIO */
|
||||
@ -302,22 +282,23 @@ static void io_complete(void *ctx, const struct nvme_completion *completion);
|
||||
static __thread unsigned int seed = 0;
|
||||
|
||||
static void
|
||||
submit_single_io(struct ns_entry *entry)
|
||||
submit_single_io(struct ns_worker_ctx *ns_ctx)
|
||||
{
|
||||
struct perf_task *task = NULL;
|
||||
uint64_t offset_in_ios;
|
||||
int rc;
|
||||
struct ns_entry *entry = ns_ctx->entry;
|
||||
|
||||
rte_mempool_get(task_pool, (void **)&task);
|
||||
|
||||
task->entry = entry;
|
||||
task->ns_ctx = ns_ctx;
|
||||
|
||||
if (g_is_random) {
|
||||
offset_in_ios = rand_r(&seed) % entry->size_in_ios;
|
||||
} else {
|
||||
offset_in_ios = entry->offset_in_ios++;
|
||||
if (entry->offset_in_ios == entry->size_in_ios) {
|
||||
entry->offset_in_ios = 0;
|
||||
offset_in_ios = ns_ctx->offset_in_ios++;
|
||||
if (ns_ctx->offset_in_ios == entry->size_in_ios) {
|
||||
ns_ctx->offset_in_ios = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -325,7 +306,7 @@ submit_single_io(struct ns_entry *entry)
|
||||
(g_rw_percentage != 0 && ((rand_r(&seed) % 100) < g_rw_percentage))) {
|
||||
#if HAVE_LIBAIO
|
||||
if (entry->type == ENTRY_TYPE_AIO_FILE) {
|
||||
rc = aio_submit(entry->u.aio.ctx, &task->iocb, entry->u.aio.fd, IO_CMD_PREAD, task->buf,
|
||||
rc = aio_submit(ns_ctx->ctx, &task->iocb, entry->u.aio.fd, IO_CMD_PREAD, task->buf,
|
||||
g_io_size_bytes, offset_in_ios * g_io_size_bytes, task);
|
||||
} else
|
||||
#endif
|
||||
@ -336,7 +317,7 @@ submit_single_io(struct ns_entry *entry)
|
||||
} else {
|
||||
#if HAVE_LIBAIO
|
||||
if (entry->type == ENTRY_TYPE_AIO_FILE) {
|
||||
rc = aio_submit(entry->u.aio.ctx, &task->iocb, entry->u.aio.fd, IO_CMD_PWRITE, task->buf,
|
||||
rc = aio_submit(ns_ctx->ctx, &task->iocb, entry->u.aio.fd, IO_CMD_PWRITE, task->buf,
|
||||
g_io_size_bytes, offset_in_ios * g_io_size_bytes, task);
|
||||
} else
|
||||
#endif
|
||||
@ -350,17 +331,17 @@ submit_single_io(struct ns_entry *entry)
|
||||
fprintf(stderr, "starting I/O failed\n");
|
||||
}
|
||||
|
||||
entry->current_queue_depth++;
|
||||
ns_ctx->current_queue_depth++;
|
||||
}
|
||||
|
||||
static void
|
||||
task_complete(struct perf_task *task)
|
||||
{
|
||||
struct ns_entry *entry;
|
||||
struct ns_worker_ctx *ns_ctx;
|
||||
|
||||
entry = task->entry;
|
||||
entry->current_queue_depth--;
|
||||
entry->io_completed++;
|
||||
ns_ctx = task->ns_ctx;
|
||||
ns_ctx->current_queue_depth--;
|
||||
ns_ctx->io_completed++;
|
||||
|
||||
rte_mempool_put(task_pool, task);
|
||||
|
||||
@ -370,8 +351,8 @@ task_complete(struct perf_task *task)
|
||||
* to complete. In this case, do not submit a new I/O to replace
|
||||
* the one just completed.
|
||||
*/
|
||||
if (!entry->is_draining) {
|
||||
submit_single_io(entry);
|
||||
if (!ns_ctx->is_draining) {
|
||||
submit_single_io(ns_ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@ -382,32 +363,32 @@ io_complete(void *ctx, const struct nvme_completion *completion)
|
||||
}
|
||||
|
||||
static void
|
||||
check_io(struct ns_entry *entry)
|
||||
check_io(struct ns_worker_ctx *ns_ctx)
|
||||
{
|
||||
#if HAVE_LIBAIO
|
||||
if (entry->type == ENTRY_TYPE_AIO_FILE) {
|
||||
aio_check_io(entry);
|
||||
if (ns_ctx->entry->type == ENTRY_TYPE_AIO_FILE) {
|
||||
aio_check_io(ns_ctx);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
nvme_ctrlr_process_io_completions(entry->u.nvme.ctrlr);
|
||||
nvme_ctrlr_process_io_completions(ns_ctx->entry->u.nvme.ctrlr);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
submit_io(struct ns_entry *entry, int queue_depth)
|
||||
submit_io(struct ns_worker_ctx *ns_ctx, int queue_depth)
|
||||
{
|
||||
while (queue_depth-- > 0) {
|
||||
submit_single_io(entry);
|
||||
submit_single_io(ns_ctx);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
drain_io(struct ns_entry *entry)
|
||||
drain_io(struct ns_worker_ctx *ns_ctx)
|
||||
{
|
||||
entry->is_draining = true;
|
||||
while (entry->current_queue_depth > 0) {
|
||||
check_io(entry);
|
||||
ns_ctx->is_draining = true;
|
||||
while (ns_ctx->current_queue_depth > 0) {
|
||||
check_io(ns_ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@ -416,18 +397,17 @@ work_fn(void *arg)
|
||||
{
|
||||
uint64_t tsc_end = rte_get_timer_cycles() + g_time_in_sec * g_tsc_rate;
|
||||
struct worker_thread *worker = (struct worker_thread *)arg;
|
||||
struct ns_entry *entry = NULL;
|
||||
struct ns_worker_ctx *ns_ctx = NULL;
|
||||
|
||||
printf("Starting thread on core %u\n", worker->lcore);
|
||||
|
||||
nvme_register_io_thread();
|
||||
|
||||
/* Submit initial I/O for each namespace. */
|
||||
entry = worker->namespaces;
|
||||
while (entry != NULL) {
|
||||
|
||||
submit_io(entry, g_queue_depth);
|
||||
entry = entry->next;
|
||||
ns_ctx = worker->ns_ctx;
|
||||
while (ns_ctx != NULL) {
|
||||
submit_io(ns_ctx, g_queue_depth);
|
||||
ns_ctx = ns_ctx->next;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
@ -436,10 +416,10 @@ work_fn(void *arg)
|
||||
* I/O will be submitted in the io_complete callback
|
||||
* to replace each I/O that is completed.
|
||||
*/
|
||||
entry = worker->namespaces;
|
||||
while (entry != NULL) {
|
||||
check_io(entry);
|
||||
entry = entry->next;
|
||||
ns_ctx = worker->ns_ctx;
|
||||
while (ns_ctx != NULL) {
|
||||
check_io(ns_ctx);
|
||||
ns_ctx = ns_ctx->next;
|
||||
}
|
||||
|
||||
if (rte_get_timer_cycles() > tsc_end) {
|
||||
@ -447,10 +427,10 @@ work_fn(void *arg)
|
||||
}
|
||||
}
|
||||
|
||||
entry = worker->namespaces;
|
||||
while (entry != NULL) {
|
||||
drain_io(entry);
|
||||
entry = entry->next;
|
||||
ns_ctx = worker->ns_ctx;
|
||||
while (ns_ctx != NULL) {
|
||||
drain_io(ns_ctx);
|
||||
ns_ctx = ns_ctx->next;
|
||||
}
|
||||
|
||||
nvme_unregister_io_thread();
|
||||
@ -480,30 +460,29 @@ print_stats(void)
|
||||
{
|
||||
float io_per_second, mb_per_second;
|
||||
float total_io_per_second, total_mb_per_second;
|
||||
struct worker_thread *worker;
|
||||
struct worker_thread *worker;
|
||||
struct ns_worker_ctx *ns_ctx;
|
||||
|
||||
total_io_per_second = 0;
|
||||
total_mb_per_second = 0;
|
||||
|
||||
worker = g_workers;
|
||||
while (worker != NULL) {
|
||||
struct ns_entry *entry = worker->namespaces;
|
||||
while (entry != NULL) {
|
||||
io_per_second = (float)entry->io_completed /
|
||||
g_time_in_sec;
|
||||
mb_per_second = io_per_second * g_io_size_bytes /
|
||||
(1024 * 1024);
|
||||
printf("%-.20s: %10.2f IO/s %10.2f MB/s on lcore %u\n",
|
||||
entry->name, io_per_second,
|
||||
mb_per_second, worker->lcore);
|
||||
while (worker) {
|
||||
ns_ctx = worker->ns_ctx;
|
||||
while (ns_ctx) {
|
||||
io_per_second = (float)ns_ctx->io_completed / g_time_in_sec;
|
||||
mb_per_second = io_per_second * g_io_size_bytes / (1024 * 1024);
|
||||
printf("%-43.43s from core %u: %10.2f IO/s %10.2f MB/s\n",
|
||||
ns_ctx->entry->name, worker->lcore,
|
||||
io_per_second, mb_per_second);
|
||||
total_io_per_second += io_per_second;
|
||||
total_mb_per_second += mb_per_second;
|
||||
entry = entry->next;
|
||||
ns_ctx = ns_ctx->next;
|
||||
}
|
||||
worker = worker->next;
|
||||
}
|
||||
printf("=====================================================\n");
|
||||
printf("%-20s: %10.2f IO/s %10.2f MB/s\n",
|
||||
printf("========================================================\n");
|
||||
printf("%-55s: %10.2f IO/s %10.2f MB/s\n",
|
||||
"Total", total_io_per_second, total_mb_per_second);
|
||||
}
|
||||
|
||||
@ -632,7 +611,8 @@ register_workers(void)
|
||||
memset(worker, 0, sizeof(struct worker_thread));
|
||||
worker->lcore = rte_get_master_lcore();
|
||||
|
||||
g_workers = g_current_worker = worker;
|
||||
g_workers = worker;
|
||||
g_num_workers = 1;
|
||||
|
||||
RTE_LCORE_FOREACH_SLAVE(lcore) {
|
||||
prev_worker = worker;
|
||||
@ -640,6 +620,7 @@ register_workers(void)
|
||||
memset(worker, 0, sizeof(struct worker_thread));
|
||||
worker->lcore = lcore;
|
||||
prev_worker->next = worker;
|
||||
g_num_workers++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -700,6 +681,7 @@ static void
|
||||
unregister_controllers(void)
|
||||
{
|
||||
struct ctrlr_entry *entry = g_controllers;
|
||||
|
||||
while (entry) {
|
||||
struct ctrlr_entry *next = entry->next;
|
||||
nvme_detach(entry->ctrlr);
|
||||
@ -725,6 +707,54 @@ register_aio_files(int argc, char **argv)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
associate_workers_with_ns(void)
|
||||
{
|
||||
struct ns_entry *entry = g_namespaces;
|
||||
struct worker_thread *worker = g_workers;
|
||||
struct ns_worker_ctx *ns_ctx;
|
||||
int i, count;
|
||||
|
||||
count = g_num_namespaces > g_num_workers ? g_num_namespaces : g_num_workers;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
ns_ctx = malloc(sizeof(struct ns_worker_ctx));
|
||||
if (!ns_ctx) {
|
||||
return -1;
|
||||
}
|
||||
memset(ns_ctx, 0, sizeof(*ns_ctx));
|
||||
#ifdef HAVE_LIBAIO
|
||||
ns_ctx->events = calloc(g_queue_depth, sizeof(struct io_event));
|
||||
if (!ns_ctx->events) {
|
||||
return -1;
|
||||
}
|
||||
ns_ctx->ctx = 0;
|
||||
if (io_setup(g_queue_depth, &ns_ctx->ctx) < 0) {
|
||||
perror("io_setup");
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
printf("Associating %s with lcore %d\n", entry->name, worker->lcore);
|
||||
ns_ctx->entry = entry;
|
||||
ns_ctx->next = worker->ns_ctx;
|
||||
worker->ns_ctx = ns_ctx;
|
||||
|
||||
worker = worker->next;
|
||||
if (worker == NULL) {
|
||||
worker = g_workers;
|
||||
}
|
||||
|
||||
entry = entry->next;
|
||||
if (entry == NULL) {
|
||||
entry = g_namespaces;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static char *ealargs[] = {
|
||||
"perf",
|
||||
"-c 0x1", /* This must be the second parameter. It is overwritten by index in main(). */
|
||||
@ -769,18 +799,28 @@ int main(int argc, char **argv)
|
||||
|
||||
g_tsc_rate = rte_get_timer_hz();
|
||||
|
||||
register_workers();
|
||||
if (register_workers() != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (register_aio_files(argc, argv) != 0) {
|
||||
return 1;
|
||||
}
|
||||
register_controllers();
|
||||
|
||||
if (register_controllers() != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (associate_workers_with_ns() != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Initialization complete. Launching workers.\n");
|
||||
|
||||
/* Launch all of the slave workers */
|
||||
worker = g_workers->next;
|
||||
while (worker != NULL) {
|
||||
if (worker->namespaces != NULL) {
|
||||
rte_eal_remote_launch(work_fn, worker, worker->lcore);
|
||||
}
|
||||
rte_eal_remote_launch(work_fn, worker, worker->lcore);
|
||||
worker = worker->next;
|
||||
}
|
||||
|
||||
@ -788,10 +828,8 @@ int main(int argc, char **argv)
|
||||
|
||||
worker = g_workers->next;
|
||||
while (worker != NULL) {
|
||||
if (worker->namespaces != NULL) {
|
||||
if (rte_eal_wait_lcore(worker->lcore) < 0) {
|
||||
return -1;
|
||||
}
|
||||
if (rte_eal_wait_lcore(worker->lcore) < 0) {
|
||||
return -1;
|
||||
}
|
||||
worker = worker->next;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user