summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYagiz Nizipli <yagiz@nizipli.com>2023-05-14 15:42:38 -0400
committerYagiz Nizipli <yagiz@nizipli.com>2023-05-16 20:35:58 -0400
commitd573ea1e8a5aaa14d39fa3de9b6b75cf293ace29 (patch)
tree46d33e1f9377d34f55e26bce665975d0931ddfcf
parentc9ec72de450eb9db92d933900c3364c1c2742df8 (diff)
downloadnode-new-simd-bytelength.tar.gz
buffer: add SIMD Neon optimization for `byteLength`simd-bytelength
Co-authored-by: Keyhan Vakil <kvakil@sylph.kvakil.me> Co-authored-by: Daniel Lemire <daniel@lemire.me>
-rw-r--r--node.gyp1
-rw-r--r--src/node_buffer.cc11
-rw-r--r--src/node_simd.cc60
-rw-r--r--src/node_simd.h22
4 files changed, 86 insertions, 8 deletions
diff --git a/node.gyp b/node.gyp
index f9621fc1e1..9d4ad4a496 100644
--- a/node.gyp
+++ b/node.gyp
@@ -121,6 +121,7 @@
'src/node_report_utils.cc',
'src/node_sea.cc',
'src/node_serdes.cc',
+ 'src/node_simd.cc',
'src/node_shadow_realm.cc',
'src/node_snapshotable.cc',
'src/node_sockaddr.cc',
diff --git a/src/node_buffer.cc b/src/node_buffer.cc
index ff041274f9..a2692479ad 100644
--- a/src/node_buffer.cc
+++ b/src/node_buffer.cc
@@ -26,6 +26,7 @@
#include "node_external_reference.h"
#include "node_i18n.h"
#include "node_internals.h"
+#include "node_simd.h"
#include "env-inl.h"
#include "simdutf.h"
@@ -743,14 +744,8 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) {
uint32_t FastByteLengthUtf8(Local<Value> receiver,
const v8::FastOneByteString& source) {
- uint32_t result = 0;
- uint32_t length = source.length;
- const uint8_t* data = reinterpret_cast<const uint8_t*>(source.data);
- for (uint32_t i = 0; i < length; ++i) {
- result += (data[i] >> 7);
- }
- result += length;
- return result;
+ return node::simd::utf8_byte_length(
+ reinterpret_cast<const uint8_t*>(source.data), source.length);
}
static v8::CFunction fast_byte_length_utf8(
diff --git a/src/node_simd.cc b/src/node_simd.cc
new file mode 100644
index 0000000000..a5265a95c0
--- /dev/null
+++ b/src/node_simd.cc
@@ -0,0 +1,60 @@
+#include "node_simd.h"
+
+#include <string_view>
+
+#if NODE_HAS_SIMD_NEON
+#include <arm_neon.h>
+#endif
+
+namespace node {
+namespace simd {
+
+#if NODE_HAS_SIMD_NEON
+uint32_t utf8_byte_length(const uint8_t* data, size_t length) {
+ uint64_t result{0};
+
+ const int lanes = sizeof(uint8x16_t);
+ const int max_sra_count = 256 / lanes; // Avoid overflowing vaddvq_u8.
+ const int unrolls = max_sra_count;
+ const int unrolled_lanes = lanes * unrolls;
+
+ const uint8_t* unroll_end = data + (length / unrolled_lanes) * unrolled_lanes;
+ uint32_t length_after_unroll = length % unrolled_lanes;
+ for (; data < unroll_end;) {
+ uint8x16_t acc = {};
+ for (int i = 0; i < unrolls; ++i, data += lanes) {
+ uint8x16_t chunk = vld1q_u8(data);
+ acc = vsraq_n_u8(acc, chunk, 7);
+ }
+ result += vaddvq_u8(acc);
+ }
+
+ const uint8_t* simd_end = data + (length_after_unroll / lanes) * lanes;
+ uint32_t length_after_simd = length % lanes;
+ uint8x16_t acc = {};
+ for (; data < simd_end; data += lanes) {
+ uint8x16_t chunk = vld1q_u8(data);
+ acc = vsraq_n_u8(acc, chunk, 7);
+ }
+ result += vaddvq_u8(acc);
+
+ const uint8_t* scalar_end = data + length_after_simd;
+ for (; data < scalar_end; data += 1) {
+ result += *data >> 7;
+ }
+
+ return result + length;
+}
+#else
+uint32_t utf8_byte_length(const uint8_t* data, size_t length) {
+ uint32_t result = 0;
+ for (uint32_t i = 0; i < length; ++i) {
+ result += (data[i] >> 7);
+ }
+ result += length;
+ return result;
+}
+#endif
+
+} // namespace simd
+} // namespace node
diff --git a/src/node_simd.h b/src/node_simd.h
new file mode 100644
index 0000000000..24398683c6
--- /dev/null
+++ b/src/node_simd.h
@@ -0,0 +1,22 @@
+#ifndef SRC_NODE_SIMD_H_
+#define SRC_NODE_SIMD_H_
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+#define NODE_HAS_SIMD_NEON 1
+#endif
+
+#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
+
+#include <string_view>
+
+namespace node {
+namespace simd {
+
+uint32_t utf8_byte_length(const uint8_t* input, size_t length);
+
+} // namespace simd
+} // namespace node
+
+#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
+
+#endif // SRC_NODE_SIMD_H_