summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaisuke Nojiri <dnojiri@chromium.org>2014-02-28 16:03:04 -0800
committerchrome-internal-fetch <chrome-internal-fetch@google.com>2014-03-01 20:15:59 +0000
commite381585619778c4952ada5f3118ec4fcf304eb66 (patch)
tree412fadab6f9d52a25fe328bc5f6e34cddbf88d04
parenta35bfd69e9a749fbaa152937dd91a6ed407a9005 (diff)
downloadchrome-ec-e381585619778c4952ada5f3118ec4fcf304eb66.tar.gz
Optimize memset
This speeds up memset by copying a word at a time. Ran the unit test on Peppy: > runtest ... Running test_memset... (speed gain: 141532 -> 32136 us) OK ... Ran make buildall: ... Running test_memset... (speed gain: 1338 -> 280 us) OK ... TEST=Described above. BUG=chrome-os-partner:23720 BRANCH=none Signed-off-by: Daisuke Nojiri <dnojiri@chromium.org> Change-Id: If34b06ad70f448d950535a4bea4f6556627a9b6f Tested-by: Daisuke Nojiri <dnojiri@google.com> Reviewed-on: https://chromium-review.googlesource.com/185936 Reviewed-by: Randall Spangler <rspangler@chromium.org> Commit-Queue: Daisuke Nojiri <dnojiri@google.com>
-rw-r--r--common/util.c37
-rw-r--r--include/test_util.h13
-rw-r--r--test/utils.c59
3 files changed, 98 insertions, 11 deletions
diff --git a/common/util.c b/common/util.c
index 5158d8aa06..f8a0519f17 100644
--- a/common/util.c
+++ b/common/util.c
@@ -217,15 +217,38 @@ void *memcpy(void *dest, const void *src, int len)
void *memset(void *dest, int c, int len)
{
- /*
- * TODO(crosbug.com/p/23720): if dest is aligned, copy a word at a time
- * instead.
- */
char *d = (char *)dest;
- while (len > 0) {
+ uint32_t cccc;
+ uint32_t *dw;
+ char *head;
+ char * const tail = (char *)dest + len;
+ /* Set 'body' to the last word boundary */
+ uint32_t * const body = (uint32_t *)((uintptr_t)tail & ~3);
+
+ c &= 0xff; /* Clear upper bits before ORing below */
+ cccc = c | (c << 8) | (c << 16) | (c << 24);
+
+ if ((uintptr_t)tail < (((uintptr_t)d + 3) & ~3))
+ /* len is shorter than the first word boundary */
+ head = tail;
+ else
+ /* Set 'head' to the first word boundary */
+ head = (char *)(((uintptr_t)d + 3) & ~3);
+
+ /* Copy head */
+ while (d < head)
*(d++) = c;
- len--;
- }
+
+ /* Copy body */
+ dw = (uint32_t *)d;
+ while (dw < body)
+ *(dw++) = cccc;
+
+ /* Copy tail */
+ d = (char *)dw;
+ while (d < tail)
+ *(d++) = c;
+
return dest;
}
diff --git a/include/test_util.h b/include/test_util.h
index 4f76a200c4..f8f50fcc76 100644
--- a/include/test_util.h
+++ b/include/test_util.h
@@ -58,6 +58,19 @@
} \
} while (0)
+#define TEST_ASSERT_MEMSET(d, c, n) \
+ do { \
+ int __i; \
+ for (__i = 0; __i < n; ++__i) \
+ if ((d)[__i] != (c)) { \
+ ccprintf("%d: ASSERT_MEMSET failed at " \
+ "index=%d: %d != %d\n", __LINE__, \
+ __i, (int)(d)[__i], (c)); \
+ task_dump_trace(); \
+ return EC_ERROR_UNKNOWN; \
+ } \
+ } while (0)
+
#define TEST_CHECK(n) \
do { \
if (n) \
diff --git a/test/utils.c b/test/utils.c
index 9e1138074f..5d4a88d0fe 100644
--- a/test/utils.c
+++ b/test/utils.c
@@ -161,6 +161,58 @@ static int test_memcpy(void)
return EC_SUCCESS;
}
+/* Plain memset, used as a reference to measure speed gain */
+static void *dumb_memset(void *dest, int c, int len)
+{
+ char *d = (char *)dest;
+ while (len > 0) {
+ *(d++) = c;
+ len--;
+ }
+ return dest;
+}
+
+static int test_memset(void)
+{
+ int i;
+ timestamp_t t0, t1, t2, t3;
+ char *buf;
+ const int buf_size = 1000;
+ const int len = 400;
+ const int iteration = 1000;
+
+ TEST_ASSERT(shared_mem_acquire(buf_size, &buf) == EC_SUCCESS);
+
+ t0 = get_time();
+ for (i = 0; i < iteration; ++i)
+ dumb_memset(buf, 1, len);
+ t1 = get_time();
+ TEST_ASSERT_MEMSET(buf, (char)1, len);
+ ccprintf(" (speed gain: %d ->", t1.val-t0.val);
+
+ t2 = get_time();
+ for (i = 0; i < iteration; ++i)
+ memset(buf, 1, len);
+ t3 = get_time();
+ TEST_ASSERT_MEMSET(buf, (char)1, len);
+ ccprintf(" %d us) ", t3.val-t2.val);
+
+ /* Expected about 4x speed gain. Use 3x because it fluctuates */
+ TEST_ASSERT((t1.val-t0.val) > (t3.val-t2.val) * 3);
+
+ memset(buf, 128, len);
+ TEST_ASSERT_MEMSET(buf, (char)128, len);
+
+ memset(buf, -2, len);
+ TEST_ASSERT_MEMSET(buf, (char)-2, len);
+
+ memset(buf + 1, 1, len - 2);
+ TEST_ASSERT_MEMSET(buf + 1, (char)1, len - 2);
+
+ shared_mem_release(buf);
+ return EC_SUCCESS;
+}
+
static int test_strzcpy(void)
{
char dest[10];
@@ -248,7 +300,7 @@ static int test_get_next_bit(void)
static int test_shared_mem(void)
{
- int i, j;
+ int i;
int sz = shared_mem_size();
char *mem;
@@ -257,9 +309,7 @@ static int test_shared_mem(void)
for (i = 0; i < 256; ++i) {
memset(mem, i, sz);
- for (j = 0; j < sz; ++j)
- TEST_ASSERT(mem[j] == (char)i);
-
+ TEST_ASSERT_MEMSET(mem, (char)i, sz);
if ((i & 0xf) == 0)
msleep(20); /* Yield to other tasks */
}
@@ -384,6 +434,7 @@ void run_test(void)
RUN_TEST(test_parse_bool);
RUN_TEST(test_memmove);
RUN_TEST(test_memcpy);
+ RUN_TEST(test_memset);
RUN_TEST(test_strzcpy);
RUN_TEST(test_strlen);
RUN_TEST(test_strcasecmp);