diff options
author | Yagiz Nizipli <yagiz@nizipli.com> | 2023-05-14 15:42:38 -0400 |
---|---|---|
committer | Yagiz Nizipli <yagiz@nizipli.com> | 2023-05-16 20:35:58 -0400 |
commit | d573ea1e8a5aaa14d39fa3de9b6b75cf293ace29 (patch) | |
tree | 46d33e1f9377d34f55e26bce665975d0931ddfcf | |
parent | c9ec72de450eb9db92d933900c3364c1c2742df8 (diff) | |
download | node-new-simd-bytelength.tar.gz |
buffer: add SIMD Neon optimization for `byteLength`simd-bytelength
Co-authored-by: Keyhan Vakil <kvakil@sylph.kvakil.me>
Co-authored-by: Daniel Lemire <daniel@lemire.me>
-rw-r--r-- | node.gyp | 1 | ||||
-rw-r--r-- | src/node_buffer.cc | 11 | ||||
-rw-r--r-- | src/node_simd.cc | 60 | ||||
-rw-r--r-- | src/node_simd.h | 22 |
4 files changed, 86 insertions, 8 deletions
@@ -121,6 +121,7 @@ 'src/node_report_utils.cc', 'src/node_sea.cc', 'src/node_serdes.cc', + 'src/node_simd.cc', 'src/node_shadow_realm.cc', 'src/node_snapshotable.cc', 'src/node_sockaddr.cc', diff --git a/src/node_buffer.cc b/src/node_buffer.cc index ff041274f9..a2692479ad 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -26,6 +26,7 @@ #include "node_external_reference.h" #include "node_i18n.h" #include "node_internals.h" +#include "node_simd.h" #include "env-inl.h" #include "simdutf.h" @@ -743,14 +744,8 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) { uint32_t FastByteLengthUtf8(Local<Value> receiver, const v8::FastOneByteString& source) { - uint32_t result = 0; - uint32_t length = source.length; - const uint8_t* data = reinterpret_cast<const uint8_t*>(source.data); - for (uint32_t i = 0; i < length; ++i) { - result += (data[i] >> 7); - } - result += length; - return result; + return node::simd::utf8_byte_length( + reinterpret_cast<const uint8_t*>(source.data), source.length); } static v8::CFunction fast_byte_length_utf8( diff --git a/src/node_simd.cc b/src/node_simd.cc new file mode 100644 index 0000000000..a5265a95c0 --- /dev/null +++ b/src/node_simd.cc @@ -0,0 +1,60 @@ +#include "node_simd.h" + +#include <string_view> + +#if NODE_HAS_SIMD_NEON +#include <arm_neon.h> +#endif + +namespace node { +namespace simd { + +#if NODE_HAS_SIMD_NEON +uint32_t utf8_byte_length(const uint8_t* data, size_t length) { + uint64_t result{0}; + + const int lanes = sizeof(uint8x16_t); + const int max_sra_count = 256 / lanes; // Avoid overflowing vaddvq_u8. + const int unrolls = max_sra_count; + const int unrolled_lanes = lanes * unrolls; + + const uint8_t* unroll_end = data + (length / unrolled_lanes) * unrolled_lanes; + uint32_t length_after_unroll = length % unrolled_lanes; + for (; data < unroll_end;) { + uint8x16_t acc = {}; + for (int i = 0; i < unrolls; ++i, data += lanes) { + uint8x16_t chunk = vld1q_u8(data); + acc = vsraq_n_u8(acc, chunk, 7); + } + result += vaddvq_u8(acc); + } + + const uint8_t* simd_end = data + (length_after_unroll / lanes) * lanes; + uint32_t length_after_simd = length % lanes; + uint8x16_t acc = {}; + for (; data < simd_end; data += lanes) { + uint8x16_t chunk = vld1q_u8(data); + acc = vsraq_n_u8(acc, chunk, 7); + } + result += vaddvq_u8(acc); + + const uint8_t* scalar_end = data + length_after_simd; + for (; data < scalar_end; data += 1) { + result += *data >> 7; + } + + return result + length; +} +#else +uint32_t utf8_byte_length(const uint8_t* data, size_t length) { + uint32_t result = 0; + for (uint32_t i = 0; i < length; ++i) { + result += (data[i] >> 7); + } + result += length; + return result; +} +#endif + +} // namespace simd +} // namespace node diff --git a/src/node_simd.h b/src/node_simd.h new file mode 100644 index 0000000000..24398683c6 --- /dev/null +++ b/src/node_simd.h @@ -0,0 +1,22 @@ +#ifndef SRC_NODE_SIMD_H_ +#define SRC_NODE_SIMD_H_ + +#if defined(__aarch64__) || defined(_M_ARM64) +#define NODE_HAS_SIMD_NEON 1 +#endif + +#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#include <string_view> + +namespace node { +namespace simd { + +uint32_t utf8_byte_length(const uint8_t* input, size_t length); + +} // namespace simd +} // namespace node + +#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#endif // SRC_NODE_SIMD_H_ |