diff options
author | Ali Saidi <alisaidi@amazon.com> | 2020-08-19 00:13:28 -0500 |
---|---|---|
committer | dormando <dormando@rydia.net> | 2020-10-27 17:54:21 -0700 |
commit | 9bb323ca3447421ee30ef26d1e48896d2d80b742 (patch) | |
tree | 85e42574ea1e877881d25ae35cdc1d4f6c754275 /crc32c.c | |
parent | eb1bc72b9497e2c0e77d66740dc3053b6855a89a (diff) | |
download | memcached-9bb323ca3447421ee30ef26d1e48896d2d80b742.tar.gz |
arm64: Re-add arm crc32c hw acceleration
Use the .arch_extension directive so that a config options and
special cflags aren't required. Add a few tests for both the
software and hardware implementations
Diffstat (limited to 'crc32c.c')
-rw-r--r-- | crc32c.c | 65 |
1 files changed, 63 insertions, 2 deletions
@@ -48,7 +48,6 @@ crc_func crc32c; /* CRC-32C (iSCSI) polynomial in reversed bit order. */ #define POLY 0x82f63b78 -uint32_t crc32c_sw(uint32_t crc, void const *buf, size_t len); uint32_t crc32c_sw_little(uint32_t crc, void const *buf, size_t len); uint32_t crc32c_sw_big(uint32_t crc, void const *buf, size_t len); #ifdef __x86_64__ @@ -274,8 +273,70 @@ void crc32c_init(void) { } } -#else /* !__x86_64__ */ +#elif defined(__aarch64__) && defined(__linux__) +#include <sys/auxv.h> +static inline uint32_t crc32cx(uint32_t crc, const uint64_t data) +{ + asm(".arch_extension crc\n" + "crc32cx %w0, %w0, %x1" : "+r" (crc) : "r" (data)); + return crc; +} + +static inline uint32_t crc32cb(uint32_t crc, const uint8_t data) +{ + asm(".arch_extension crc\n" + "crc32cb %w0, %w0, %w1" : "+r" (crc) : "r" (data)); + return crc; +} + +static uint32_t crc32c_hw(uint32_t crc, void const *buf, size_t len) { + crc = ~crc; + unsigned char const *next = buf; + + while (((uintptr_t)next & 7) && len > 0) { + crc = crc32cb(crc, *(uint8_t *)next); + next++; + len--; + } + + while (len >= 64) { + uint64_t *next8 = (uint64_t *)next; + crc = crc32cx(crc, next8[0]); + crc = crc32cx(crc, next8[1]); + crc = crc32cx(crc, next8[2]); + crc = crc32cx(crc, next8[3]); + crc = crc32cx(crc, next8[4]); + crc = crc32cx(crc, next8[5]); + crc = crc32cx(crc, next8[6]); + crc = crc32cx(crc, next8[7]); + next += 64; + len -= 64; + } + + while (len >= 8) { + crc = crc32cx(crc, *(uint64_t *)next); + next += 8; + len -= 8; + } + + while (len > 0) { + crc = crc32cb(crc, *(uint8_t *)next); + next++; + len--; + } + + return ~crc; +} + +void crc32c_init(void) { + uint64_t auxv = getauxval(AT_HWCAP); + + crc32c = crc32c_sw; + if (auxv & HWCAP_CRC32) + crc32c = crc32c_hw; +} +#else /* !__x86_64__i && !__aarch64__ */ void crc32c_init(void) { crc32c = crc32c_sw; } |