/* * DMA Engine test module * * Copyright (C) 2007 Atmel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ #include <linux/delay.h> #include <linux/dma-mapping.h> #include <linux/dmaengine.h> #include <linux/freezer.h> #include <linux/init.h> #include <linux/kthread.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/wait.h> static unsigned int test_buf_size = 16384; module_param(test_buf_size, uint, S_IRUGO); MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer"); static char test_channel[20]; module_param_string(channel, test_channel, sizeof(test_channel), S_IRUGO); MODULE_PARM_DESC(channel, "Bus ID of the channel to test (default: any)"); static char test_device[20]; module_param_string(device, test_device, sizeof(test_device), S_IRUGO); MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)"); static unsigned int threads_per_chan = 1; module_param(threads_per_chan, uint, S_IRUGO); MODULE_PARM_DESC(threads_per_chan, "Number of threads to start per channel (default: 1)"); static unsigned int max_channels; module_param(max_channels, uint, S_IRUGO); MODULE_PARM_DESC(max_channels, "Maximum number of channels to use (default: all)"); static unsigned int iterations; module_param(iterations, uint, S_IRUGO); MODULE_PARM_DESC(iterations, "Iterations before stopping test (default: infinite)"); static unsigned int xor_sources = 3; module_param(xor_sources, uint, S_IRUGO); MODULE_PARM_DESC(xor_sources, "Number of xor source buffers (default: 3)"); static unsigned int pq_sources = 3; module_param(pq_sources, uint, S_IRUGO); MODULE_PARM_DESC(pq_sources, "Number of p+q source buffers (default: 3)"); static int timeout = 3000; module_param(timeout, uint, S_IRUGO); MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), " "Pass -1 for infinite timeout"); /* * Initialization patterns. All bytes in the source buffer has bit 7 * set, all bytes in the destination buffer has bit 7 cleared. * * Bit 6 is set for all bytes which are to be copied by the DMA * engine. Bit 5 is set for all bytes which are to be overwritten by * the DMA engine. * * The remaining bits are the inverse of a counter which increments by * one for each byte address. */ #define PATTERN_SRC 0x80 #define PATTERN_DST 0x00 #define PATTERN_COPY 0x40 #define PATTERN_OVERWRITE 0x20 #define PATTERN_COUNT_MASK 0x1f struct dmatest_thread { struct list_head node; struct task_struct *task; struct dma_chan *chan; u8 **srcs; u8 **dsts; enum dma_transaction_type type; }; struct dmatest_chan { struct list_head node; struct dma_chan *chan; struct list_head threads; }; /* * These are protected by dma_list_mutex since they're only used by * the DMA filter function callback */ static LIST_HEAD(dmatest_channels); static unsigned int nr_channels; static bool dmatest_match_channel(struct dma_chan *chan) { if (test_channel[0] == '\0') return true; return strcmp(dma_chan_name(chan), test_channel) == 0; } static bool dmatest_match_device(struct dma_device *device) { if (test_device[0] == '\0') return true; return strcmp(dev_name(device->dev), test_device) == 0; } static unsigned long dmatest_random(void) { unsigned long buf; get_random_bytes(&buf, sizeof(buf)); return buf; } static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len) { unsigned int i; u8 *buf; for (; (buf = *bufs); bufs++) { for (i = 0; i < start; i++) buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); for ( ; i < start + len; i++) buf[i] = PATTERN_SRC | PATTERN_COPY | (~i & PATTERN_COUNT_MASK); for ( ; i < test_buf_size; i++) buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); buf++; } } static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len) { unsigned int i; u8 *buf; for (; (buf = *bufs); bufs++) { for (i = 0; i < start; i++) buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK); for ( ; i < start + len; i++) buf[i] = PATTERN_DST | PATTERN_OVERWRITE | (~i & PATTERN_COUNT_MASK); for ( ; i < test_buf_size; i++) buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK); } } static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index, unsigned int counter, bool is_srcbuf) { u8 diff = actual ^ pattern; u8 expected = pattern | (~counter & PATTERN_COUNT_MASK); const char *thread_name = current->comm; if (is_srcbuf) pr_warning("%s: srcbuf[0x%x] overwritten!" " Expected %02x, got %02x\n", thread_name, index, expected, actual); else if ((pattern & PATTERN_COPY) && (diff & (PATTERN_COPY | PATTERN_OVERWRITE))) pr_warning("%s: dstbuf[0x%x] not copied!" " Expected %02x, got %02x\n", thread_name, index, expected, actual); else if (diff & PATTERN_SRC) pr_warning("%s: dstbuf[0x%x] was copied!" " Expected %02x, got %02x\n", thread_name, index, expected, actual); else pr_warning("%s: dstbuf[0x%x] mismatch!" " Expected %02x, got %02x\n", thread_name, index, expected, actual); } static unsigned int dmatest_verify(u8 **bufs, unsigned int start, unsigned int end, unsigned int counter, u8 pattern, bool is_srcbuf) { unsigned int i; unsigned int error_count = 0; u8 actual; u8 expected; u8 *buf; unsigned int counter_orig = counter; for (; (buf = *bufs); bufs++) { counter = counter_orig; for (i = start; i < end; i++) { actual = buf[i]; expected = pattern | (~counter & PATTERN_COUNT_MASK); if (actual != expected) { if (error_count < 32) dmatest_mismatch(actual, pattern, i, counter, is_srcbuf); error_count++; } counter++; } } if (error_count > 32) pr_warning("%s: %u errors suppressed\n", current->comm, error_count - 32); return error_count; } /* poor man's completion - we want to use wait_event_freezable() on it */ struct dmatest_done { bool done; wait_queue_head_t *wait; }; static void dmatest_callback(void *arg) { struct dmatest_done *done = arg; done->done = true; wake_up_all(done->wait); } static inline void unmap_src(struct device *dev, dma_addr_t *addr, size_t len, unsigned int count) { while (count--) dma_unmap_single(dev, addr[count], len, DMA_TO_DEVICE); } static inline void unmap_dst(struct device *dev, dma_addr_t *addr, size_t len, unsigned int count) { while (count--) dma_unmap_single(dev, addr[count], len, DMA_BIDIRECTIONAL); } /* * This function repeatedly tests DMA transfers of various lengths and * offsets for a given operation type until it is told to exit by * kthread_stop(). There may be multiple threads running this function * in parallel for a single channel, and there may be multiple channels * being tested in parallel. * * Before each test, the source and destination buffer is initialized * with a known pattern. This pattern is different depending on * whether it's in an area which is supposed to be copied or * overwritten, and different in the source and destination buffers. * So if the DMA engine doesn't copy exactly what we tell it to copy, * we'll notice. */ static int dmatest_func(void *data) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait); struct dmatest_thread *thread = data; struct dmatest_done done = { .wait = &done_wait }; struct dma_chan *chan; const char *thread_name; unsigned int src_off, dst_off, len; unsigned int error_count; unsigned int failed_tests = 0; unsigned int total_tests = 0; dma_cookie_t cookie; enum dma_status status; enum dma_ctrl_flags flags; u8 pq_coefs[pq_sources + 1]; int ret; int src_cnt; int dst_cnt; int i; thread_name = current->comm; set_freezable(); ret = -ENOMEM; smp_rmb(); chan = thread->chan; if (thread->type == DMA_MEMCPY) src_cnt = dst_cnt = 1; else if (thread->type == DMA_XOR) { src_cnt = xor_sources | 1; /* force odd to ensure dst = src */ dst_cnt = 1; } else if (thread->type == DMA_PQ) { src_cnt = pq_sources | 1; /* force odd to ensure dst = src */ dst_cnt = 2; for (i = 0; i < src_cnt; i++) pq_coefs[i] = 1; } else goto err_srcs; thread->srcs = kcalloc(src_cnt+1, sizeof(u8 *), GFP_KERNEL); if (!thread->srcs) goto err_srcs; for (i = 0; i < src_cnt; i++) { thread->srcs[i] = kmalloc(test_buf_size, GFP_KERNEL); if (!thread->srcs[i]) goto err_srcbuf; } thread->srcs[i] = NULL; thread->dsts = kcalloc(dst_cnt+1, sizeof(u8 *), GFP_KERNEL); if (!thread->dsts) goto err_dsts; for (i = 0; i < dst_cnt; i++) { thread->dsts[i] = kmalloc(test_buf_size, GFP_KERNEL); if (!thread->dsts[i]) goto err_dstbuf; } thread->dsts[i] = NULL; set_user_nice(current, 10); /* * src buffers are freed by the DMAEngine code with dma_unmap_single() * dst buffers are freed by ourselves below */ flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SRC_UNMAP_SINGLE; while (!kthread_should_stop() && !(iterations && total_tests >= iterations)) { struct dma_device *dev = chan->device; struct dma_async_tx_descriptor *tx = NULL; dma_addr_t dma_srcs[src_cnt]; dma_addr_t dma_dsts[dst_cnt]; u8 align = 0; total_tests++; /* honor alignment restrictions */ if (thread->type == DMA_MEMCPY) align = dev->copy_align; else if (thread->type == DMA_XOR) align = dev->xor_align; else if (thread->type == DMA_PQ) align = dev->pq_align; if (1 << align > test_buf_size) { pr_err("%u-byte buffer too small for %d-byte alignment\n", test_buf_size, 1 << align); break; } len = dmatest_random() % test_buf_size + 1; len = (len >> align) << align; if (!len) len = 1 << align; src_off = dmatest_random() % (test_buf_size - len + 1); dst_off = dmatest_random() % (test_buf_size - len + 1); src_off = (src_off >> align) << align; dst_off = (dst_off >> align) << align; dmatest_init_srcs(thread->srcs, src_off, len); dmatest_init_dsts(thread->dsts, dst_off, len); for (i = 0; i < src_cnt; i++) { u8 *buf = thread->srcs[i] + src_off; dma_srcs[i] = dma_map_single(dev->dev, buf, len, DMA_TO_DEVICE); ret = dma_mapping_error(dev->dev, dma_srcs[i]); if (ret) { unmap_src(dev->dev, dma_srcs, len, i); pr_warn("%s: #%u: mapping error %d with " "src_off=0x%x len=0x%x\n", thread_name, total_tests - 1, ret, src_off, len); failed_tests++; continue; } } /* map with DMA_BIDIRECTIONAL to force writeback/invalidate */ for (i = 0; i < dst_cnt; i++) { dma_dsts[i] = dma_map_single(dev->dev, thread->dsts[i], test_buf_size, DMA_BIDIRECTIONAL); ret = dma_mapping_error(dev->dev, dma_dsts[i]); if (ret) { unmap_src(dev->dev, dma_srcs, len, src_cnt); unmap_dst(dev->dev, dma_dsts, test_buf_size, i); pr_warn("%s: #%u: mapping error %d with " "dst_off=0x%x len=0x%x\n", thread_name, total_tests - 1, ret, dst_off, test_buf_size); failed_tests++; continue; } } if (thread->type == DMA_MEMCPY) tx = dev->device_prep_dma_memcpy(chan, dma_dsts[0] + dst_off, dma_srcs[0], len, flags); else if (thread->type == DMA_XOR) tx = dev->device_prep_dma_xor(chan, dma_dsts[0] + dst_off, dma_srcs, src_cnt, len, flags); else if (thread->type == DMA_PQ) { dma_addr_t dma_pq[dst_cnt]; for (i = 0; i < dst_cnt; i++) dma_pq[i] = dma_dsts[i] + dst_off; tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs, src_cnt, pq_coefs, len, flags); } if (!tx) { unmap_src(dev->dev, dma_srcs, len, src_cnt); unmap_dst(dev->dev, dma_dsts, test_buf_size, dst_cnt); pr_warning("%s: #%u: prep error with src_off=0x%x " "dst_off=0x%x len=0x%x\n", thread_name, total_tests - 1, src_off, dst_off, len); msleep(100); failed_tests++; continue; } done.done = false; tx->callback = dmatest_callback; tx->callback_param = &done; cookie = tx->tx_submit(tx); if (dma_submit_error(cookie)) { pr_warning("%s: #%u: submit error %d with src_off=0x%x " "dst_off=0x%x len=0x%x\n", thread_name, total_tests - 1, cookie, src_off, dst_off, len); msleep(100); failed_tests++; continue; } dma_async_issue_pending(chan); wait_event_freezable_timeout(done_wait, done.done, msecs_to_jiffies(timeout)); status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); if (!done.done) { /* * We're leaving the timed out dma operation with * dangling pointer to done_wait. To make this * correct, we'll need to allocate wait_done for * each test iteration and perform "who's gonna * free it this time?" dancing. For now, just * leave it dangling. */ pr_warning("%s: #%u: test timed out\n", thread_name, total_tests - 1); failed_tests++; continue; } else if (status != DMA_SUCCESS) { pr_warning("%s: #%u: got completion callback," " but status is \'%s\'\n", thread_name, total_tests - 1, status == DMA_ERROR ? "error" : "in progress"); failed_tests++; continue; } /* Unmap by myself (see DMA_COMPL_SKIP_DEST_UNMAP above) */ unmap_dst(dev->dev, dma_dsts, test_buf_size, dst_cnt); error_count = 0; pr_debug("%s: verifying source buffer...\n", thread_name); error_count += dmatest_verify(thread->srcs, 0, src_off, 0, PATTERN_SRC, true); error_count += dmatest_verify(thread->srcs, src_off, src_off + len, src_off, PATTERN_SRC | PATTERN_COPY, true); error_count += dmatest_verify(thread->srcs, src_off + len, test_buf_size, src_off + len, PATTERN_SRC, true); pr_debug("%s: verifying dest buffer...\n", thread->task->comm); error_count += dmatest_verify(thread->dsts, 0, dst_off, 0, PATTERN_DST, false); error_count += dmatest_verify(thread->dsts, dst_off, dst_off + len, src_off, PATTERN_SRC | PATTERN_COPY, false); error_count += dmatest_verify(thread->dsts, dst_off + len, test_buf_size, dst_off + len, PATTERN_DST, false); if (error_count) { pr_warning("%s: #%u: %u errors with " "src_off=0x%x dst_off=0x%x len=0x%x\n", thread_name, total_tests - 1, error_count, src_off, dst_off, len); failed_tests++; } else { pr_debug("%s: #%u: No errors with " "src_off=0x%x dst_off=0x%x len=0x%x\n", thread_name, total_tests - 1, src_off, dst_off, len); } } ret = 0; for (i = 0; thread->dsts[i]; i++) kfree(thread->dsts[i]); err_dstbuf: kfree(thread->dsts); err_dsts: for (i = 0; thread->srcs[i]; i++) kfree(thread->srcs[i]); err_srcbuf: kfree(thread->srcs); err_srcs: pr_notice("%s: terminating after %u tests, %u failures (status %d)\n", thread_name, total_tests, failed_tests, ret); /* terminate all transfers on specified channels */ chan->device->device_control(chan, DMA_TERMINATE_ALL, 0); if (iterations > 0) while (!kthread_should_stop()) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait_dmatest_exit); interruptible_sleep_on(&wait_dmatest_exit); } return ret; } static void dmatest_cleanup_channel(struct dmatest_chan *dtc) { struct dmatest_thread *thread; struct dmatest_thread *_thread; int ret; list_for_each_entry_safe(thread, _thread, &dtc->threads, node) { ret = kthread_stop(thread->task); pr_debug("dmatest: thread %s exited with status %d\n", thread->task->comm, ret); list_del(&thread->node); kfree(thread); } /* terminate all transfers on specified channels */ dtc->chan->device->device_control(dtc->chan, DMA_TERMINATE_ALL, 0); kfree(dtc); } static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_type type) { struct dmatest_thread *thread; struct dma_chan *chan = dtc->chan; char *op; unsigned int i; if (type == DMA_MEMCPY) op = "copy"; else if (type == DMA_XOR) op = "xor"; else if (type == DMA_PQ) op = "pq"; else return -EINVAL; for (i = 0; i < threads_per_chan; i++) { thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL); if (!thread) { pr_warning("dmatest: No memory for %s-%s%u\n", dma_chan_name(chan), op, i); break; } thread->chan = dtc->chan; thread->type = type; smp_wmb(); thread->task = kthread_run(dmatest_func, thread, "%s-%s%u", dma_chan_name(chan), op, i); if (IS_ERR(thread->task)) { pr_warning("dmatest: Failed to run thread %s-%s%u\n", dma_chan_name(chan), op, i); kfree(thread); break; } /* srcbuf and dstbuf are allocated by the thread itself */ list_add_tail(&thread->node, &dtc->threads); } return i; } static int dmatest_add_channel(struct dma_chan *chan) { struct dmatest_chan *dtc; struct dma_device *dma_dev = chan->device; unsigned int thread_count = 0; int cnt; dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL); if (!dtc) { pr_warning("dmatest: No memory for %s\n", dma_chan_name(chan)); return -ENOMEM; } dtc->chan = chan; INIT_LIST_HEAD(&dtc->threads); if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { cnt = dmatest_add_threads(dtc, DMA_MEMCPY); thread_count += cnt > 0 ? cnt : 0; } if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { cnt = dmatest_add_threads(dtc, DMA_XOR); thread_count += cnt > 0 ? cnt : 0; } if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { cnt = dmatest_add_threads(dtc, DMA_PQ); thread_count += cnt > 0 ? cnt : 0; } pr_info("dmatest: Started %u threads using %s\n", thread_count, dma_chan_name(chan)); list_add_tail(&dtc->node, &dmatest_channels); nr_channels++; return 0; } static bool filter(struct dma_chan *chan, void *param) { if (!dmatest_match_channel(chan) || !dmatest_match_device(chan->device)) return false; else return true; } static int __init dmatest_init(void) { dma_cap_mask_t mask; struct dma_chan *chan; int err = 0; dma_cap_zero(mask); dma_cap_set(DMA_MEMCPY, mask); for (;;) { chan = dma_request_channel(mask, filter, NULL); if (chan) { err = dmatest_add_channel(chan); if (err) { dma_release_channel(chan); break; /* add_channel failed, punt */ } } else break; /* no more channels available */ if (max_channels && nr_channels >= max_channels) break; /* we have all we need */ } return err; } /* when compiled-in wait for drivers to load first */ late_initcall(dmatest_init); static void __exit dmatest_exit(void) { struct dmatest_chan *dtc, *_dtc; struct dma_chan *chan; list_for_each_entry_safe(dtc, _dtc, &dmatest_channels, node) { list_del(&dtc->node); chan = dtc->chan; dmatest_cleanup_channel(dtc); pr_debug("dmatest: dropped channel %s\n", dma_chan_name(chan)); dma_release_channel(chan); } } module_exit(dmatest_exit); MODULE_AUTHOR("Haavard Skinnemoen (Atmel)"); MODULE_LICENSE("GPL v2");