summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts
diff options
context:
space:
mode:
authorDaniel Stewart <daniel.stewart@linaro.org>2018-05-07 14:04:31 -0400
committerMark Benvenuto <mark.benvenuto@mongodb.com>2018-05-07 14:09:38 -0400
commitb1dbe3e36e9bc3f4a8f0533f7d5faee5baaa5f97 (patch)
tree15a7421bf15cdeeed20cbcfd87b1ea28b4e8bbb8 /src/mongo/db/fts
parent8a41a08818c38c4f79a1c4ba2dfe453e8e547dd0 (diff)
downloadmongo-b1dbe3e36e9bc3f4a8f0533f7d5faee5baaa5f97.tar.gz
SERVER-30870 Enable Unicode Fast Byte Vector Optimization for arm64
closes #1222
Diffstat (limited to 'src/mongo/db/fts')
-rw-r--r--src/mongo/db/fts/unicode/byte_vector.h2
-rw-r--r--src/mongo/db/fts/unicode/byte_vector_neon.h157
2 files changed, 159 insertions, 0 deletions
diff --git a/src/mongo/db/fts/unicode/byte_vector.h b/src/mongo/db/fts/unicode/byte_vector.h
index 2404e15dfda..e64f76bc8ae 100644
--- a/src/mongo/db/fts/unicode/byte_vector.h
+++ b/src/mongo/db/fts/unicode/byte_vector.h
@@ -35,6 +35,8 @@
#include "mongo/db/fts/unicode/byte_vector_sse2.h"
#elif defined(__powerpc64__)
#include "mongo/db/fts/unicode/byte_vector_altivec.h"
+#elif defined(__aarch64__)
+#include "mongo/db/fts/unicode/byte_vector_neon.h"
#else // Other platforms go above here.
#undef MONGO_HAVE_FAST_BYTE_VECTOR
#endif
diff --git a/src/mongo/db/fts/unicode/byte_vector_neon.h b/src/mongo/db/fts/unicode/byte_vector_neon.h
new file mode 100644
index 00000000000..f85946a2995
--- /dev/null
+++ b/src/mongo/db/fts/unicode/byte_vector_neon.h
@@ -0,0 +1,157 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <arm_neon.h>
+#include <cstdint>
+
+#include "mongo/platform/bits.h"
+
+namespace mongo {
+namespace unicode {
+
+/**
+ * A sequence of bytes that can be manipulated using vectorized instructions.
+ *
+ * This is specific to the use case in mongo::unicode::String and not intended as a general purpose
+ * vector class.
+ *
+ * This specialization offers acceleration for aarch64.
+ */
+class ByteVector {
+public:
+ using Native = uint8x16_t;
+ using Mask = uint16_t;
+ using Scalar = int8_t;
+ static const int size = sizeof(Native);
+
+ /**
+ * Sets all bytes to 0.
+ */
+ ByteVector() : _data(vdupq_n_u8(0)) {}
+
+ /**
+ * Sets all bytes to val.
+ */
+ explicit ByteVector(Scalar val) : _data(vdupq_n_u8(val)) {}
+
+ /**
+ * Load a vector from a potentially unaligned location.
+ */
+ static ByteVector load(const void* ptr) {
+ // This function is documented as taking an unaligned pointer.
+ return vld1q_u8(reinterpret_cast<const uint8_t*>(ptr));
+ }
+
+ /**
+ * Store this vector to a potentially unaligned location.
+ */
+ void store(void* ptr) const {
+ // This function is documented as taking an unaligned pointer.
+ vst1q_u8(reinterpret_cast<uint8_t*>(ptr), _data);
+ }
+
+ /**
+ * Returns a bitmask with the high bit from each byte.
+ */
+ Mask maskHigh() const {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+ uint64x1_t p;
+ // vset_lane_u64 initializes p but the compiler does not understand this and considers p
+ // uninitialized.
+ p = vset_lane_u64(0x8040201008040201, p, 0);
+#pragma GCC diagnostic pop
+ uint8x16_t powers = vcombine_u8(vreinterpret_u8_u64(p), vreinterpret_u8_u64(p));
+ int8x16_t zero8x16 = vdupq_n_s8(0);
+ uint8x16_t input = vcltq_s8(vreinterpretq_s8_u8(_data), zero8x16);
+ uint64x2_t mask = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(input, powers))));
+ uint16_t output;
+ output = ((vgetq_lane_u8(vreinterpretq_u8_u64(mask), 8) << 8) |
+ (vgetq_lane_u8(vreinterpretq_u8_u64(mask), 0) << 0));
+ return output;
+ }
+
+ /**
+ * Returns a bitmask with any bit from each byte.
+ *
+ * This operation only makes sense if all bytes are either 0x00 or 0xff, such as the result from
+ * comparison operations.
+ */
+ Mask maskAny() const {
+ return maskHigh(); // Other archs may be more efficient here.
+ }
+
+ /**
+ * Counts zero bits in mask from whichever side corresponds to the lowest memory address.
+ */
+ static uint32_t countInitialZeros(Mask mask) {
+ return mask == 0 ? size : countTrailingZeros64(mask);
+ }
+
+ /**
+ * Sets each byte to 0xff if it is ==(EQ), <(LT), or >(GT), otherwise 0x00.
+ *
+ * May use either signed or unsigned comparisons since this use case doesn't care about bytes
+ * with high bit set.
+ */
+ ByteVector compareEQ(Scalar val) const {
+ return vceqq_u8(_data, ByteVector(val)._data);
+ }
+ ByteVector compareLT(Scalar val) const {
+ return vcltq_u8(_data, ByteVector(val)._data);
+ }
+ ByteVector compareGT(Scalar val) const {
+ return vcgtq_u8(_data, ByteVector(val)._data);
+ }
+
+ ByteVector operator|(ByteVector other) const {
+ return vorrq_u8(_data, other._data);
+ }
+
+ ByteVector& operator|=(ByteVector other) {
+ return (*this = (*this | other));
+ }
+
+ ByteVector operator&(ByteVector other) const {
+ return vandq_u8(_data, other._data);
+ }
+
+ ByteVector& operator&=(ByteVector other) {
+ return (*this = (*this & other));
+ }
+
+private:
+ ByteVector(Native data) : _data(data) {}
+
+ Native _data;
+};
+
+} // namespace unicode
+} // namespace mongo