summaryrefslogtreecommitdiff
path: root/libc
diff options
context:
space:
mode:
Diffstat (limited to 'libc')
-rw-r--r--libc/src/string/memory_utils/bcmp_implementations.h67
-rw-r--r--libc/src/string/memory_utils/utils.h1
2 files changed, 63 insertions, 5 deletions
diff --git a/libc/src/string/memory_utils/bcmp_implementations.h b/libc/src/string/memory_utils/bcmp_implementations.h
index 1c238cb88398..070e7794102f 100644
--- a/libc/src/string/memory_utils/bcmp_implementations.h
+++ b/libc/src/string/memory_utils/bcmp_implementations.h
@@ -22,14 +22,67 @@
namespace __llvm_libc {
[[maybe_unused]] LIBC_INLINE BcmpReturnType
-inline_bcmp_embedded_tiny(CPtr p1, CPtr p2, size_t count) {
+inline_bcmp_byte_per_byte(CPtr p1, CPtr p2, size_t offset, size_t count) {
LIBC_LOOP_NOUNROLL
- for (size_t offset = 0; offset < count; ++offset)
- if (auto value = generic::Bcmp<1>::block(p1 + offset, p2 + offset))
- return value;
+ for (; offset < count; ++offset)
+ if (p1[offset] != p2[offset])
+ return BcmpReturnType::NONZERO();
return BcmpReturnType::ZERO();
}
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_aligned_access_64bit(CPtr p1, CPtr p2, size_t count) {
+ constexpr size_t kAlign = sizeof(uint64_t);
+ if (count <= 2 * kAlign)
+ return inline_bcmp_byte_per_byte(p1, p2, 0, count);
+ size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1);
+ if (auto value = inline_bcmp_byte_per_byte(p1, p2, 0, bytes_to_p1_align))
+ return value;
+ size_t offset = bytes_to_p1_align;
+ size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset);
+ for (; offset < count - kAlign; offset += kAlign) {
+ uint64_t a;
+ if (p2_alignment == 0)
+ a = load64_aligned<uint64_t>(p2, offset);
+ else if (p2_alignment == 4)
+ a = load64_aligned<uint32_t, uint32_t>(p2, offset);
+ else if (p2_alignment == 2)
+ a = load64_aligned<uint16_t, uint16_t, uint16_t, uint16_t>(p2, offset);
+ else
+ a = load64_aligned<uint8_t, uint16_t, uint16_t, uint16_t, uint8_t>(
+ p2, offset);
+ uint64_t b = load64_aligned<uint64_t>(p1, offset);
+ if (a != b)
+ return BcmpReturnType::NONZERO();
+ }
+ return inline_bcmp_byte_per_byte(p1, p2, offset, count);
+}
+
+[[maybe_unused]] LIBC_INLINE BcmpReturnType
+inline_bcmp_aligned_access_32bit(CPtr p1, CPtr p2, size_t count) {
+ constexpr size_t kAlign = sizeof(uint32_t);
+ if (count <= 2 * kAlign)
+ return inline_bcmp_byte_per_byte(p1, p2, 0, count);
+ size_t bytes_to_p1_align = distance_to_align_up<kAlign>(p1);
+ if (auto value = inline_bcmp_byte_per_byte(p1, p2, 0, bytes_to_p1_align))
+ return value;
+ size_t offset = bytes_to_p1_align;
+ size_t p2_alignment = distance_to_align_down<kAlign>(p2 + offset);
+ for (; offset < count - kAlign; offset += kAlign) {
+ uint32_t a;
+ if (p2_alignment == 0)
+ a = load32_aligned<uint32_t>(p2, offset);
+ else if (p2_alignment == 2)
+ a = load32_aligned<uint16_t, uint16_t>(p2, offset);
+ else
+ a = load32_aligned<uint8_t, uint16_t, uint8_t>(p2, offset);
+ uint32_t b = load32_aligned<uint32_t>(p1, offset);
+ if (a != b)
+ return BcmpReturnType::NONZERO();
+ }
+ return inline_bcmp_byte_per_byte(p1, p2, offset, count);
+}
+
#if defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_AARCH64)
[[maybe_unused]] LIBC_INLINE BcmpReturnType
inline_bcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
@@ -167,8 +220,12 @@ LIBC_INLINE BcmpReturnType inline_bcmp(CPtr p1, CPtr p2, size_t count) {
return inline_bcmp_x86(p1, p2, count);
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
return inline_bcmp_aarch64(p1, p2, count);
+#elif defined(LIBC_TARGET_ARCH_IS_RISCV64)
+ return inline_bcmp_aligned_access_64bit(p1, p2, count);
+#elif defined(LIBC_TARGET_ARCH_IS_RISCV32)
+ return inline_bcmp_aligned_access_32bit(p1, p2, count);
#else
- return inline_bcmp_embedded_tiny(p1, p2, count);
+ return inline_bcmp_byte_per_byte(p1, p2, 0, count);
#endif
}
diff --git a/libc/src/string/memory_utils/utils.h b/libc/src/string/memory_utils/utils.h
index ab33331847af..d5be52b73eb8 100644
--- a/libc/src/string/memory_utils/utils.h
+++ b/libc/src/string/memory_utils/utils.h
@@ -140,6 +140,7 @@ template <typename T> struct StrictIntegralType {
// Helper to get the zero value.
LIBC_INLINE static constexpr StrictIntegralType ZERO() { return {T(0)}; }
+ LIBC_INLINE static constexpr StrictIntegralType NONZERO() { return {T(1)}; }
private:
T value;