diff options
author | Daisuke Nojiri <dnojiri@google.com> | 2014-02-11 17:06:50 -0800 |
---|---|---|
committer | chrome-internal-fetch <chrome-internal-fetch@google.com> | 2014-02-13 20:37:05 +0000 |
commit | e7e0cf2cae2794437c4c71884b54c96cb6caf9b5 (patch) | |
tree | 3a0d51c3342587e90b7bfa6cf9d7e11cb42dd801 | |
parent | 7aa3258ae71f114e2ec328851bd201b2c9799ad4 (diff) | |
download | chrome-ec-e7e0cf2cae2794437c4c71884b54c96cb6caf9b5.tar.gz |
Optimize memmove
This speeds up memmove by copying a word at a time.
Ran the unit test on Peppy:
> runtest
...
Running test_memmove... (speed gain: 2156 -> 592 us) OK
...
Ran make buildall:
...
Running test_memmove... (speed gain: 143918 -> 32367 us) OK
...
TEST=Described above.
BUG=chrome-os-partner:23720
BRANCH=none
Signed-off-by: Daisuke Nojiri <dnojiri@chromium.org>
Tested-by: Daisuke Nojiri <dnojiri@google.com>
Change-Id: I6a3ac6aed27a404c3bef227b6c886a59414b51d7
Reviewed-on: https://chromium-review.googlesource.com/186020
Reviewed-by: Vic Yang <victoryang@chromium.org>
Reviewed-by: Randall Spangler <rspangler@chromium.org>
-rw-r--r-- | common/util.c | 44 | ||||
-rw-r--r-- | test/utils.c | 43 |
2 files changed, 71 insertions, 16 deletions
diff --git a/common/util.c b/common/util.c index 628aef9e82..5158d8aa06 100644 --- a/common/util.c +++ b/common/util.c @@ -238,17 +238,45 @@ void *memmove(void *dest, const void *src, int len) * memcpy(). */ return memcpy(dest, src, len); } else { - /* Copy from end, so we don't overwrite the source */ + /* Need to copy from tail because there is overlap. */ char *d = (char *)dest + len; const char *s = (const char *)src + len; - /* - * TODO(crosbug.com/p/23720): if src/dest are aligned, copy a - * word at a time instead. - */ - while (len > 0) { - *(--d) = *(--s); - len--; + uint32_t *dw; + const uint32_t *sw; + char *head; + char * const tail = (char *)dest; + /* Set 'body' to the last word boundary */ + uint32_t * const body = (uint32_t *)(((uintptr_t)tail+3) & ~3); + + if (((uintptr_t)dest & 3) != ((uintptr_t)src & 3)) { + /* Misaligned. no body, no tail. */ + head = tail; + } else { + /* Aligned */ + if ((uintptr_t)tail > ((uintptr_t)d & ~3)) + /* Shorter than the first word boundary */ + head = tail; + else + /* Set 'head' to the first word boundary */ + head = (char *)((uintptr_t)d & ~3); } + + /* Copy head */ + while (d > head) + *(--d) = *(--s); + + /* Copy body */ + dw = (uint32_t *)d; + sw = (uint32_t *)s; + while (dw > body) + *(--dw) = *(--sw); + + /* Copy tail */ + d = (char *)dw; + s = (const char *)sw; + while (d > tail) + *(--d) = *(--s); + return dest; } } diff --git a/test/utils.c b/test/utils.c index 1e58184977..9e1138074f 100644 --- a/test/utils.c +++ b/test/utils.c @@ -68,18 +68,45 @@ static int test_parse_bool(void) static int test_memmove(void) { - char buf[100]; int i; + timestamp_t t0, t1, t2, t3; + char *buf; + const int buf_size = 1000; + const int len = 400; + const int iteration = 1000; + + TEST_ASSERT(shared_mem_acquire(buf_size, &buf) == EC_SUCCESS); - for (i = 0; i < 100; ++i) + for (i = 0; i < len; ++i) + buf[i] = i & 0x7f; + for (i = len; i < buf_size; ++i) buf[i] = 0; - for (i = 0; i < 30; ++i) - buf[i] = i; - memmove(buf + 60, buf, 30); - TEST_ASSERT_ARRAY_EQ(buf, buf + 60, 30); - memmove(buf + 10, buf, 30); - TEST_ASSERT_ARRAY_EQ(buf + 10, buf + 60, 30); + t0 = get_time(); + for (i = 0; i < iteration; ++i) + memmove(buf + 101, buf, len); /* unaligned */ + t1 = get_time(); + TEST_ASSERT_ARRAY_EQ(buf + 101, buf, len); + ccprintf(" (speed gain: %d ->", t1.val-t0.val); + + t2 = get_time(); + for (i = 0; i < iteration; ++i) + memmove(buf + 100, buf, len); /* aligned */ + t3 = get_time(); + ccprintf(" %d us) ", t3.val-t2.val); + TEST_ASSERT_ARRAY_EQ(buf + 100, buf, len); + + /* Expected about 4x speed gain. Use 3x because it fluctuates */ + TEST_ASSERT((t1.val-t0.val) > (t3.val-t2.val) * 3); + + /* Test small moves */ + memmove(buf + 1, buf, 1); + TEST_ASSERT_ARRAY_EQ(buf + 1, buf, 1); + memmove(buf + 5, buf, 4); + memmove(buf + 1, buf, 4); + TEST_ASSERT_ARRAY_EQ(buf + 1, buf + 5, 4); + + shared_mem_release(buf); return EC_SUCCESS; } |