summaryrefslogtreecommitdiff
path: root/common
diff options
context:
space:
mode:
authorDaisuke Nojiri <dnojiri@google.com>2014-02-08 17:03:15 -0800
committerchrome-internal-fetch <chrome-internal-fetch@google.com>2014-02-12 19:40:52 +0000
commitd3facbd92fe4e3f9815a9c4896bf2d5b31e51899 (patch)
tree30211686ce73766e052cc6dfac9a167eec441039 /common
parenta78c59e4acbbd5b85c221b477b2db43f5e5d679b (diff)
downloadchrome-ec-d3facbd92fe4e3f9815a9c4896bf2d5b31e51899.tar.gz
Optimize memcpy
This speeds up memcpy by copying a word at a time if source and destination are aligned in mod 4. That is, if n and m are a positive integer: 4n -> 4m: aligned, 4x speed. 4n -> 4m+1: misaligned. 4n+1 -> 4m+1: aligned in mod 4, 4x speed. Ran the unit test on Peppy: > runtest ... Running test_memcpy... (speed gain: 120300 -> 38103 us) OK ... Ran make buildall -j: ... Running test_memcpy... (speed gain: 2084 -> 549 us) OK ... Note misaligned case is also optimized. Unit test runs in 298 us on Peppy while it takes about 475 with the original memcpy. TEST=Described above. BUG=chrome-os-partner:23720 BRANCH=none Signed-off-by: Daisuke Nojiri <dnojiri@chromium.org> Change-Id: Ic12260451c5efd0896d6353017cd45d29cb672db Tested-by: Daisuke Nojiri <dnojiri@google.com> Reviewed-on: https://chromium-review.googlesource.com/185618 Reviewed-by: Randall Spangler <rspangler@chromium.org> Reviewed-by: Vincent Palatin <vpalatin@chromium.org> Commit-Queue: Daisuke Nojiri <dnojiri@google.com>
Diffstat (limited to 'common')
-rw-r--r--common/util.c42
1 files changed, 35 insertions, 7 deletions
diff --git a/common/util.c b/common/util.c
index 32439edaaa..628aef9e82 100644
--- a/common/util.c
+++ b/common/util.c
@@ -173,16 +173,44 @@ int memcmp(const void *s1, const void *s2, int len)
void *memcpy(void *dest, const void *src, int len)
{
- /*
- * TODO(crosbug.com/p/23720): if src/dest are aligned, copy a word at a
- * time instead.
- */
char *d = (char *)dest;
const char *s = (const char *)src;
- while (len > 0) {
- *(d++) = *(s++);
- len--;
+ uint32_t *dw;
+ const uint32_t *sw;
+ char *head;
+ char * const tail = (char *)dest + len;
+ /* Set 'body' to the last word boundary */
+ uint32_t * const body = (uint32_t *)((uintptr_t)tail & ~3);
+
+ if (((uintptr_t)dest & 3) != ((uintptr_t)src & 3)) {
+ /* Misaligned. no body, no tail. */
+ head = tail;
+ } else {
+ /* Aligned */
+ if ((uintptr_t)tail < (((uintptr_t)d + 3) & ~3))
+ /* len is shorter than the first word boundary */
+ head = tail;
+ else
+ /* Set 'head' to the first word boundary */
+ head = (char *)(((uintptr_t)d + 3) & ~3);
}
+
+ /* Copy head */
+ while (d < head)
+ *(d++) = *(s++);
+
+ /* Copy body */
+ dw = (uint32_t *)d;
+ sw = (uint32_t *)s;
+ while (dw < body)
+ *(dw++) = *(sw++);
+
+ /* Copy tail */
+ d = (char *)dw;
+ s = (const char *)sw;
+ while (d < tail)
+ *(d++) = *(s++);
+
return dest;
}