From 6a2b78a379e4a6ca11eaacb3e26bea397a46d74b Mon Sep 17 00:00:00 2001 From: Snappy Team Date: Wed, 27 Apr 2022 15:16:35 +0000 Subject: Optimize Zippy compression for ARM by 5-10% by choosing csel instructions PiperOrigin-RevId: 444863689 --- snappy-internal.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/snappy-internal.h b/snappy-internal.h index ae7ab5a..e552ea0 100644 --- a/snappy-internal.h +++ b/snappy-internal.h @@ -230,8 +230,9 @@ static inline std::pair FindMatchLength(const char* s1, uint64_t xorval = a1 ^ a2; int shift = Bits::FindLSBSetNonZero64(xorval); size_t matched_bytes = shift >> 3; + uint64_t a3 = UNALIGNED_LOAD64(s2 + 4); #ifndef __x86_64__ - *data = UNALIGNED_LOAD64(s2 + matched_bytes); + a2 = static_cast(xorval) == 0 ? a3 : a2; #else // Ideally this would just be // @@ -242,13 +243,12 @@ static inline std::pair FindMatchLength(const char* s1, // use a conditional move (it's tuned to cut data dependencies). In this // case there is a longer parallel chain anyway AND this will be fairly // unpredictable. - uint64_t a3 = UNALIGNED_LOAD64(s2 + 4); asm("testl %k2, %k2\n\t" "cmovzq %1, %0\n\t" : "+r"(a2) : "r"(a3), "r"(xorval)); - *data = a2 >> (shift & (3 * 8)); #endif + *data = a2 >> (shift & (3 * 8)); return std::pair(matched_bytes, true); } else { matched = 8; @@ -270,16 +270,16 @@ static inline std::pair FindMatchLength(const char* s1, uint64_t xorval = a1 ^ a2; int shift = Bits::FindLSBSetNonZero64(xorval); size_t matched_bytes = shift >> 3; + uint64_t a3 = UNALIGNED_LOAD64(s2 + 4); #ifndef __x86_64__ - *data = UNALIGNED_LOAD64(s2 + matched_bytes); + a2 = static_cast(xorval) == 0 ? a3 : a2; #else - uint64_t a3 = UNALIGNED_LOAD64(s2 + 4); asm("testl %k2, %k2\n\t" "cmovzq %1, %0\n\t" : "+r"(a2) : "r"(a3), "r"(xorval)); - *data = a2 >> (shift & (3 * 8)); #endif + *data = a2 >> (shift & (3 * 8)); matched += matched_bytes; assert(matched >= 8); return std::pair(matched, false); -- cgit v1.2.1