/* * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * * Copyright (c) Intel Corporation. * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * BSD LICENSE * * Copyright (c) Intel Corporation. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copy * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * PCIe DMA Perf Linux driver */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define DRIVER_NAME "dma_perf" #define DRIVER_DESCRIPTION "PCIe DMA Performance Measurement Tool" #define DRIVER_LICENSE "Dual BSD/GPL" #define DRIVER_VERSION "1.0" #define DRIVER_AUTHOR "Dave Jiang " #define MAX_THREADS 32 #define MAX_TEST_SIZE 1024 * 1024 /* 1M */ #define DMA_CHANNELS_PER_NODE 8 MODULE_LICENSE(DRIVER_LICENSE); MODULE_VERSION(DRIVER_VERSION); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_AUTHOR("Changpeng Liu "); MODULE_DESCRIPTION(DRIVER_DESCRIPTION); static struct dentry *perf_debugfs_dir; static struct perf_ctx *g_perf = NULL; static unsigned int seg_order = 12; /* 4K */ static unsigned int queue_depth = 256; static unsigned int run_order = 32; /* 4G */ struct perf_mw { size_t buf_size; void *virt_addr; }; struct perf_ctx; struct pthr_ctx { struct dentry *debugfs_thr_dir; struct dentry *debugfs_copied; struct dentry *debugfs_elapsed_time; struct device *dev; int node; wait_queue_head_t wq; struct perf_mw mw; struct task_struct *thread; struct perf_ctx *perf; atomic_t dma_sync; struct dma_chan *dma_chan; int dma_up; int dma_down; int dma_prep_err; u64 copied; u64 elapsed_time; }; struct perf_ctx { spinlock_t db_lock; struct dentry *debugfs_node_dir; struct dentry *debugfs_run; struct dentry *debugfs_threads; struct dentry *debugfs_queue_depth; struct dentry *debugfs_transfer_size_order; struct dentry *debugfs_total_size_order; struct dentry *debugfs_status; u8 numa_nodes; u8 perf_threads; bool run; struct pthr_ctx pthr_ctx[MAX_THREADS]; atomic_t tsync; }; static void perf_free_mw(struct pthr_ctx *pctx); static int perf_set_mw(struct pthr_ctx *pctx, size_t size); static void perf_copy_callback(void *data) { struct pthr_ctx *pctx = data; atomic_dec(&pctx->dma_sync); pctx->dma_down++; wake_up(&pctx->wq); } static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst, char *src, size_t size) { struct dma_async_tx_descriptor *txd; struct dma_chan *chan = pctx->dma_chan; struct dma_device *device; struct dmaengine_unmap_data *unmap; dma_cookie_t cookie; size_t src_off, dst_off; int retries = 0; if (!chan) { printk("DMA engine does not exist\n"); return -EINVAL; } device = chan->device; src_off = (size_t)src & ~PAGE_MASK; dst_off = (size_t)dst & ~PAGE_MASK; if (!is_dma_copy_aligned(device, src_off, dst_off, size)) return -ENODEV; unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT); if (!unmap) return -ENOMEM; unmap->len = size; unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src), src_off, size, DMA_TO_DEVICE); if (dma_mapping_error(device->dev, unmap->addr[0])) goto err_get_unmap; unmap->to_cnt = 1; unmap->addr[1] = dma_map_page(device->dev, virt_to_page(dst), dst_off, size, DMA_FROM_DEVICE); if (dma_mapping_error(device->dev, unmap->addr[1])) goto err_get_unmap; unmap->from_cnt = 1; dma_prep_retry: txd = device->device_prep_dma_memcpy(chan, unmap->addr[1], unmap->addr[0], size, DMA_PREP_INTERRUPT); if (!txd) { if (retries++ > 20) { pctx->dma_prep_err++; goto err_get_unmap; } else { set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(50); goto dma_prep_retry; } } txd->callback = perf_copy_callback; txd->callback_param = pctx; dma_set_unmap(txd, unmap); cookie = dmaengine_submit(txd); if (dma_submit_error(cookie)) goto err_set_unmap; atomic_inc(&pctx->dma_sync); pctx->dma_up++; dma_async_issue_pending(chan); return size; err_set_unmap: dmaengine_unmap_put(unmap); err_get_unmap: dmaengine_unmap_put(unmap); return 0; } static int perf_move_data(struct pthr_ctx *pctx, char *dst, char *src, u64 buf_size, u64 win_size, u64 total) { int chunks, total_chunks, i; int copied_chunks = 0; u64 result; char *tmp = dst; u64 perf, diff_us; ktime_t kstart, kstop, kdiff; chunks = win_size / buf_size; total_chunks = total / buf_size; printk("%s: chunks: %d total_chunks: %d\n", current->comm, chunks, total_chunks); kstart = ktime_get(); for (i = 0; i < total_chunks; i++) { wait_event_interruptible(pctx->wq, atomic_read(&pctx->dma_sync) < queue_depth); result = perf_copy(pctx, tmp, src, buf_size); pctx->copied += result; copied_chunks++; if (copied_chunks == chunks) { tmp = dst; copied_chunks = 0; } else tmp += buf_size; } printk("%s: All DMA descriptors submitted\n", current->comm); /* FIXME: need a timeout here eventually */ while (atomic_read(&pctx->dma_sync) != 0) msleep(1); pr_info("%s: dma_up: %d dma_down: %d dma_prep_err: %d\n", current->comm, pctx->dma_up, pctx->dma_down, pctx->dma_prep_err); kstop = ktime_get(); kdiff = ktime_sub(kstop, kstart); diff_us = ktime_to_us(kdiff); pr_info("%s: copied %Lu bytes\n", current->comm, pctx->copied); pr_info("%s: lasted %Lu usecs\n", current->comm, diff_us); perf = pctx->copied / diff_us; pr_info("%s: MBytes/s: %Lu\n", current->comm, perf); pctx->elapsed_time = diff_us; return 0; } static bool perf_dma_filter_fn(struct dma_chan *chan, void *node) { return dev_to_node(&chan->dev->device) == (int)(unsigned long)node; } static int dma_perf_thread(void *data) { struct pthr_ctx *pctx = data; struct perf_ctx *perf = pctx->perf; struct perf_mw *mw = &pctx->mw; char *dst; u64 win_size, buf_size, total; void *src; int rc, node; struct dma_chan *dma_chan = NULL; pr_info("kthread %s starting...\n", current->comm); node = pctx->node; if (!pctx->dma_chan) { dma_cap_mask_t dma_mask; dma_cap_zero(dma_mask); dma_cap_set(DMA_MEMCPY, dma_mask); dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn, (void *)(unsigned long)node); if (!dma_chan) { pr_warn("%s: cannot acquire DMA channel, quitting\n", current->comm); return -ENODEV; } pctx->dma_chan = dma_chan; pctx->dev = dma_chan->device->dev; } src = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node); if (!src) { rc = -ENOMEM; goto err; } rc = perf_set_mw(pctx, MAX_TEST_SIZE); if (rc < 0) { pr_err("%s: set mw failed\n", current->comm); rc = -ENXIO; goto err; } win_size = mw->buf_size; buf_size = 1ULL << seg_order; total = 1ULL << run_order; if (buf_size > MAX_TEST_SIZE) buf_size = MAX_TEST_SIZE; dst = (char *)mw->virt_addr; atomic_inc(&perf->tsync); while (atomic_read(&perf->tsync) != perf->perf_threads) schedule(); rc = perf_move_data(pctx, dst, src, buf_size, win_size, total); atomic_dec(&perf->tsync); if (rc < 0) { pr_err("%s: failed\n", current->comm); rc = -ENXIO; goto err; } return 0; err: if (src) kfree(src); if (dma_chan) { dma_release_channel(dma_chan); pctx->dma_chan = NULL; } return rc; } static void perf_free_mw(struct pthr_ctx *pctx) { struct perf_mw *mw = &pctx->mw; if (!mw->virt_addr) return; kfree(mw->virt_addr); mw->buf_size = 0; mw->virt_addr = NULL; } static int perf_set_mw(struct pthr_ctx *pctx, size_t size) { struct perf_mw *mw = &pctx->mw; if (!size) return -EINVAL; mw->buf_size = size; mw->virt_addr = kmalloc_node(size, GFP_KERNEL, pctx->node); if (!mw->virt_addr) { mw->buf_size = 0; return -EINVAL; } return 0; } static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, size_t count, loff_t *offp) { struct perf_ctx *perf = filp->private_data; char *buf; ssize_t ret, out_offset; if (!perf) return 0; buf = kmalloc(64, GFP_KERNEL); out_offset = snprintf(buf, 64, "%d\n", perf->run); ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); kfree(buf); return ret; } static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *offp) { struct perf_ctx *perf = filp->private_data; int node, i; if (perf->perf_threads == 0) return 0; if (atomic_read(&perf->tsync) == 0) perf->run = false; if (perf->run == true) { /* lets stop the threads */ perf->run = false; for (i = 0; i < MAX_THREADS; i++) { if (perf->pthr_ctx[i].thread) { kthread_stop(perf->pthr_ctx[i].thread); perf->pthr_ctx[i].thread = NULL; } else break; } } else { perf->run = true; if (perf->perf_threads > MAX_THREADS) { perf->perf_threads = MAX_THREADS; pr_info("Reset total threads to: %u\n", MAX_THREADS); } /* no greater than 1M */ if (seg_order > 20) { seg_order = 20; pr_info("Fix seg_order to %u\n", seg_order); } if (run_order < seg_order) { run_order = seg_order; pr_info("Fix run_order to %u\n", run_order); } /* launch kernel thread */ for (i = 0; i < perf->perf_threads; i++) { struct pthr_ctx *pctx; pctx = &perf->pthr_ctx[i]; atomic_set(&pctx->dma_sync, 0); pctx->perf = perf; pctx->elapsed_time = 0; pctx->copied = 0; init_waitqueue_head(&pctx->wq); /* NUMA socket node */ pctx->node = i / DMA_CHANNELS_PER_NODE; node = pctx->node; pctx->thread = kthread_create_on_node(dma_perf_thread, (void *)pctx, node, "dma_perf %d", i); if (pctx->thread) wake_up_process(pctx->thread); else { perf->run = false; for (i = 0; i < MAX_THREADS; i++) { if (pctx->thread) { kthread_stop(pctx->thread); pctx->thread = NULL; } else break; } } if (perf->run == false) return -ENXIO; } } return count; } static const struct file_operations dma_perf_debugfs_run = { .owner = THIS_MODULE, .open = simple_open, .read = debugfs_run_read, .write = debugfs_run_write, }; static ssize_t debugfs_status_read(struct file *filp, char __user *ubuf, size_t count, loff_t *offp) { struct perf_ctx *perf = filp->private_data; char *buf; ssize_t ret, out_offset; if (!perf) return 0; buf = kmalloc(64, GFP_KERNEL); out_offset = snprintf(buf, 64, "%s\n", atomic_read(&perf->tsync) ? "running" : "idle"); ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); kfree(buf); return ret; } static const struct file_operations dma_perf_debugfs_status = { .owner = THIS_MODULE, .open = simple_open, .read = debugfs_status_read, }; static int perf_debugfs_setup(struct perf_ctx *perf) { int i; char temp_name[64]; if (!perf_debugfs_dir) return -ENODEV; perf->debugfs_node_dir = debugfs_create_dir("dmaperf", perf_debugfs_dir); if (!perf->debugfs_node_dir) return -ENODEV; perf->debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR, perf->debugfs_node_dir, perf, &dma_perf_debugfs_run); if (!perf->debugfs_run) return -ENODEV; perf->debugfs_status = debugfs_create_file("status", S_IRUSR, perf->debugfs_node_dir, perf, &dma_perf_debugfs_status); if (!perf->debugfs_status) return -ENODEV; perf->debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR, perf->debugfs_node_dir, &perf->perf_threads); if (!perf->debugfs_threads) return -ENODEV; perf->debugfs_queue_depth = debugfs_create_u32("queue_depth", S_IRUSR | S_IWUSR, perf->debugfs_node_dir, &queue_depth); if (!perf->debugfs_queue_depth) return -ENODEV; perf->debugfs_transfer_size_order = debugfs_create_u32("transfer_size_order", S_IRUSR | S_IWUSR, perf->debugfs_node_dir, &seg_order); if (!perf->debugfs_transfer_size_order) return -ENODEV; perf->debugfs_total_size_order = debugfs_create_u32("total_size_order", S_IRUSR | S_IWUSR, perf->debugfs_node_dir, &run_order); if (!perf->debugfs_total_size_order) return -ENODEV; for (i = 0; i < MAX_THREADS; i++) { struct pthr_ctx *pctx = &perf->pthr_ctx[i]; sprintf(temp_name, "thread_%d", i); pctx->debugfs_thr_dir = debugfs_create_dir(temp_name, perf->debugfs_node_dir); if (!pctx->debugfs_thr_dir) return -ENODEV; pctx->debugfs_copied = debugfs_create_u64("copied", S_IRUSR, pctx->debugfs_thr_dir, &pctx->copied); if (!pctx->debugfs_copied) return -ENODEV; pctx->debugfs_elapsed_time = debugfs_create_u64("elapsed_time", S_IRUSR, pctx->debugfs_thr_dir, &pctx->elapsed_time); if (!pctx->debugfs_elapsed_time) return -ENODEV; } return 0; } static int perf_probe(void) { struct perf_ctx *perf; int rc = 0; perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, 0); if (!perf) { rc = -ENOMEM; goto err_perf; } perf->numa_nodes = num_online_nodes(); perf->perf_threads = 1; atomic_set(&perf->tsync, 0); perf->run = false; spin_lock_init(&perf->db_lock); if (debugfs_initialized() && !perf_debugfs_dir) { perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); if (!perf_debugfs_dir) goto err_ctx; rc = perf_debugfs_setup(perf); if (rc) goto err_ctx; } g_perf = perf; return 0; err_ctx: kfree(perf); err_perf: return rc; } static void perf_remove(void) { int i; struct perf_ctx *perf = g_perf; if (perf_debugfs_dir) { debugfs_remove_recursive(perf_debugfs_dir); perf_debugfs_dir = NULL; } for (i = 0; i < MAX_THREADS; i++) { struct pthr_ctx *pctx = &perf->pthr_ctx[i]; if (pctx->dma_chan) dma_release_channel(pctx->dma_chan); perf_free_mw(pctx); } kfree(perf); } static int __init perf_init_module(void) { printk("DMA Performance Test Init\n"); return perf_probe(); } module_init(perf_init_module); static void __exit perf_exit_module(void) { printk("DMA Performance Test Exit\n"); perf_remove(); } module_exit(perf_exit_module);