summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMathias Stearn <mathias@10gen.com>2016-02-24 19:55:22 -0500
committerMathias Stearn <mathias@10gen.com>2016-03-11 08:50:18 -0500
commit67eee08bb606537df7417670d423c0527dd6221f (patch)
treecf3cde1244e74fac2950edd2c9f636947ea1d15f
parent6c3157f126bb44ab275325e85de7abee5ce9ad6d (diff)
downloadmongo-67eee08bb606537df7417670d423c0527dd6221f.tar.gz
SERVER-19936 Vector-optimize FTS phrase matches
Now handles up to 16 bytes of ASCII at a time if SSE2 is enabled.
-rw-r--r--src/mongo/db/fts/unicode/SConscript9
-rw-r--r--src/mongo/db/fts/unicode/byte_vector.h38
-rw-r--r--src/mongo/db/fts/unicode/byte_vector_sse2.h141
-rw-r--r--src/mongo/db/fts/unicode/byte_vector_test.cpp183
-rw-r--r--src/mongo/db/fts/unicode/string.cpp41
5 files changed, 412 insertions, 0 deletions
diff --git a/src/mongo/db/fts/unicode/SConscript b/src/mongo/db/fts/unicode/SConscript
index e4dfdda8171..dc9980ec50d 100644
--- a/src/mongo/db/fts/unicode/SConscript
+++ b/src/mongo/db/fts/unicode/SConscript
@@ -69,3 +69,12 @@ env.CppUnitTest(
'unicode',
]
)
+
+env.CppUnitTest(
+ target='byte_vector_test',
+ source=[
+ 'byte_vector_test.cpp'
+ ],
+ LIBDEPS=[
+ ]
+)
diff --git a/src/mongo/db/fts/unicode/byte_vector.h b/src/mongo/db/fts/unicode/byte_vector.h
new file mode 100644
index 00000000000..d361a857ff1
--- /dev/null
+++ b/src/mongo/db/fts/unicode/byte_vector.h
@@ -0,0 +1,38 @@
+/**
+ * Copyright (C) 2016 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#define MONGO_HAVE_FAST_BYTE_VECTOR
+
+// TODO replace this with #if BOOST_HW_SIMD_X86 >= BOOST_HW_SIMD_X86_SSE2_VERSION in boost 1.60
+#if defined(_M_AMD64) || defined(__amd64__)
+#include "mongo/db/fts/unicode/byte_vector_sse2.h"
+#else // Other platforms go above here.
+#undef MONGO_HAVE_FAST_BYTE_VECTOR
+#endif
diff --git a/src/mongo/db/fts/unicode/byte_vector_sse2.h b/src/mongo/db/fts/unicode/byte_vector_sse2.h
new file mode 100644
index 00000000000..6262488085a
--- /dev/null
+++ b/src/mongo/db/fts/unicode/byte_vector_sse2.h
@@ -0,0 +1,141 @@
+/**
+ * Copyright (C) 2016 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <emmintrin.h>
+
+#include "mongo/platform/bits.h"
+
+namespace mongo {
+namespace unicode {
+
+/**
+ * A sequence of bytes that can be manipulated using vectorized instructions.
+ *
+ * This is specific to the use case in mongo::unicode::String and not intended as a general purpose
+ * vector class.
+ */
+class ByteVector {
+public:
+ using Native = __m128i;
+ using Mask = uint32_t; // should be uint16_t but better codegen with uint32_t.
+ using Scalar = int8_t;
+ static const int size = sizeof(Native);
+
+ /**
+ * Sets all bytes to 0.
+ */
+ ByteVector() : _data(_mm_setzero_si128()) {}
+
+ /**
+ * Sets all bytes to val.
+ */
+ explicit ByteVector(Scalar val) : _data(_mm_set1_epi8(val)) {}
+
+ /**
+ * Load a vector from a potentially unaligned location.
+ */
+ static ByteVector load(const void* ptr) {
+ // This function is documented as taking an unaligned pointer.
+ return _mm_loadu_si128(reinterpret_cast<const Native*>(ptr));
+ }
+
+ /**
+ * Store this vector to a potentially unaligned location.
+ */
+ void store(void* ptr) const {
+ // This function is documented as taking an unaligned pointer.
+ _mm_storeu_si128(reinterpret_cast<Native*>(ptr), _data);
+ }
+
+ /**
+ * Returns a bitmask with the high bit from each byte.
+ */
+ Mask maskHigh() const {
+ return _mm_movemask_epi8(_data);
+ }
+
+ /**
+ * Returns a bitmask with any bit from each byte.
+ *
+ * This operation only makes sense if all bytes are either 0x00 or 0xff, such as the result from
+ * comparison operations.
+ */
+ Mask maskAny() const {
+ return maskHigh(); // Other archs may be more efficient here.
+ }
+
+ /**
+ * Counts zero bits in mask from whichever side corresponds to the lowest memory address.
+ */
+ static uint32_t countInitialZeros(Mask mask) {
+ return mask == 0 ? size : countTrailingZeros64(mask);
+ }
+
+ /**
+ * Sets each byte to 0xff if it is ==(EQ), <(LT), or >(GT), otherwise 0x00.
+ *
+ * May use either signed or unsigned comparisons since this use case doesn't care about bytes
+ * with high bit set.
+ */
+ ByteVector compareEQ(Scalar val) const {
+ return _mm_cmpeq_epi8(_data, ByteVector(val)._data);
+ }
+ ByteVector compareLT(Scalar val) const {
+ return _mm_cmplt_epi8(_data, ByteVector(val)._data);
+ }
+ ByteVector compareGT(Scalar val) const {
+ return _mm_cmpgt_epi8(_data, ByteVector(val)._data);
+ }
+
+ ByteVector operator|(ByteVector other) const {
+ return _mm_or_si128(_data, other._data);
+ }
+
+ ByteVector& operator|=(ByteVector other) {
+ return (*this = (*this | other));
+ }
+
+ ByteVector operator&(ByteVector other) const {
+ return _mm_and_si128(_data, other._data);
+ }
+
+ ByteVector& operator&=(ByteVector other) {
+ return (*this = (*this & other));
+ }
+
+private:
+ ByteVector(Native data) : _data(data) {}
+
+ Native _data;
+};
+
+} // namespace unicode
+} // namespace mongo
diff --git a/src/mongo/db/fts/unicode/byte_vector_test.cpp b/src/mongo/db/fts/unicode/byte_vector_test.cpp
new file mode 100644
index 00000000000..bab382ab8e5
--- /dev/null
+++ b/src/mongo/db/fts/unicode/byte_vector_test.cpp
@@ -0,0 +1,183 @@
+/**
+ * Copyright (C) 2016 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include <cstring>
+#include <iterator>
+#include <numeric>
+
+#include "mongo/db/fts/unicode/byte_vector.h"
+#include "mongo/unittest/unittest.h"
+
+#ifdef MONGO_HAVE_FAST_BYTE_VECTOR
+namespace mongo {
+namespace unicode {
+
+TEST(ByteVector, LoadStoreUnaligned) {
+ uint8_t inputBuf[ByteVector::size * 2];
+ uint8_t outputBuf[ByteVector::size * 2];
+ std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+ // Try loads and stores at all possible (mis)alignments.
+ for (size_t offset = 0; offset < ByteVector::size; offset++) {
+ std::memset(outputBuf, 0, sizeof(outputBuf));
+ ByteVector::load(inputBuf + offset).store(outputBuf + offset);
+
+ for (size_t i = 0; i < ByteVector::size; i++) {
+ ASSERT_EQ(outputBuf[offset + i], inputBuf[offset + i]);
+ }
+ }
+}
+
+TEST(ByteVector, Splat) {
+ uint8_t outputBuf[ByteVector::size] = {};
+ ByteVector(0x12).store(outputBuf);
+
+ for (size_t i = 0; i < ByteVector::size; i++) {
+ ASSERT_EQ(outputBuf[i], 0x12);
+ }
+}
+
+TEST(ByteVector, MaskAny) {
+ uint8_t inputBuf[ByteVector::size];
+ std::memset(inputBuf, 0xFF, sizeof(inputBuf));
+ for (size_t offset = 0; offset <= ByteVector::size; offset++) {
+ auto mask = ByteVector::load(inputBuf).maskAny();
+ ASSERT_EQ(ByteVector::countInitialZeros(mask), offset);
+ if (offset < ByteVector::size) {
+ inputBuf[offset] = 0; // Add an initial 0 for the next loop.
+ }
+ }
+}
+
+TEST(ByteVector, MaskHigh) {
+ uint8_t inputBuf[ByteVector::size];
+ std::memset(inputBuf, 0x80, sizeof(inputBuf));
+ for (size_t offset = 0; offset <= ByteVector::size; offset++) {
+ auto mask = ByteVector::load(inputBuf).maskHigh();
+ ASSERT_EQ(ByteVector::countInitialZeros(mask), offset);
+ if (offset < ByteVector::size) {
+ inputBuf[offset] = 0x7f; // Add an initial 0 bit for the next loop.
+ }
+ }
+}
+
+TEST(ByteVector, CompareEQ) {
+ uint8_t inputBuf[ByteVector::size];
+ uint8_t outputBuf[ByteVector::size] = {};
+ std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+ ByteVector::load(inputBuf).compareEQ(3).store(outputBuf);
+
+ for (size_t i = 0; i < ByteVector::size; i++) {
+ ASSERT_EQ(outputBuf[i], inputBuf[i] == 3 ? 0xFF : 0x00);
+ }
+}
+
+TEST(ByteVector, CompareGT) {
+ uint8_t inputBuf[ByteVector::size];
+ uint8_t outputBuf[ByteVector::size] = {};
+ std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+ ByteVector::load(inputBuf).compareGT(3).store(outputBuf);
+
+ for (size_t i = 0; i < ByteVector::size; i++) {
+ ASSERT_EQ(outputBuf[i], inputBuf[i] > 3 ? 0xFF : 0x00);
+ }
+}
+
+TEST(ByteVector, CompareLT) {
+ uint8_t inputBuf[ByteVector::size];
+ uint8_t outputBuf[ByteVector::size] = {};
+ std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+ ByteVector::load(inputBuf).compareLT(3).store(outputBuf);
+
+ for (size_t i = 0; i < ByteVector::size; i++) {
+ ASSERT_EQ(outputBuf[i], inputBuf[i] < 3 ? 0xFF : 0x00);
+ }
+}
+
+TEST(ByteVector, BitOr) {
+ uint8_t inputBuf[ByteVector::size];
+ uint8_t outputBuf[ByteVector::size] = {};
+ std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+ (ByteVector::load(inputBuf) | ByteVector(0x1)).store(outputBuf);
+
+ for (size_t i = 0; i < ByteVector::size; i++) {
+ ASSERT_EQ(outputBuf[i], inputBuf[i] | 1);
+ }
+}
+
+TEST(ByteVector, BitOrAssign) {
+ uint8_t inputBuf[ByteVector::size];
+ uint8_t outputBuf[ByteVector::size] = {};
+ std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+ auto vec = ByteVector::load(inputBuf);
+ vec |= ByteVector(2);
+ vec.store(outputBuf);
+
+ for (size_t i = 0; i < ByteVector::size; i++) {
+ ASSERT_EQ(outputBuf[i], inputBuf[i] | 2);
+ }
+}
+
+TEST(ByteVector, BitAnd) {
+ uint8_t inputBuf[ByteVector::size];
+ uint8_t outputBuf[ByteVector::size] = {};
+ std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+ (ByteVector::load(inputBuf) & ByteVector(2)).store(outputBuf);
+
+ for (size_t i = 0; i < ByteVector::size; i++) {
+ ASSERT_EQ(outputBuf[i], inputBuf[i] & 2);
+ }
+}
+
+TEST(ByteVector, BitAndAssign) {
+ uint8_t inputBuf[ByteVector::size];
+ uint8_t outputBuf[ByteVector::size] = {};
+ std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+ auto vec = ByteVector::load(inputBuf);
+ vec &= ByteVector(2);
+ vec.store(outputBuf);
+
+ for (size_t i = 0; i < ByteVector::size; i++) {
+ ASSERT_EQ(outputBuf[i], inputBuf[i] & 2);
+ }
+}
+
+} // namespace unicode
+} // namespace mongo
+#else
+// Our unittest framework gets angry if there are no tests. If we don't have ByteVector, give it a
+// dummy test to make it happy.
+TEST(ByteVector, ByteVectorNotSupportedOnThisPlatform) {}
+#endif
diff --git a/src/mongo/db/fts/unicode/string.cpp b/src/mongo/db/fts/unicode/string.cpp
index 0ea37d8f33f..483cb7b22eb 100644
--- a/src/mongo/db/fts/unicode/string.cpp
+++ b/src/mongo/db/fts/unicode/string.cpp
@@ -31,6 +31,7 @@
#include <algorithm>
#include <boost/algorithm/searching/boyer_moore.hpp>
+#include "mongo/db/fts/unicode/byte_vector.h"
#include "mongo/platform/bits.h"
#include "mongo/shell/linenoise_utf8.h"
#include "mongo/util/assert_util.h"
@@ -171,6 +172,46 @@ std::pair<std::unique_ptr<char[]>, char*> String::prepForSubstrMatch(StringData
auto outputIt = buffer.get();
for (auto inputIt = utf8.begin(), endIt = utf8.end(); inputIt != endIt;) {
+#ifdef MONGO_HAVE_FAST_BYTE_VECTOR
+ if (size_t(endIt - inputIt) >= ByteVector::size) {
+ // Try the fast path for 16 contiguous bytes of ASCII.
+ auto word = ByteVector::load(&*inputIt);
+
+ // Count the bytes of ASCII.
+ uint32_t usableBytes = ByteVector::countInitialZeros(word.maskHigh());
+ if (usableBytes) {
+ if (!(options & kCaseSensitive)) {
+ if (mode == CaseFoldMode::kTurkish) {
+ ByteVector::Mask iMask = word.compareEQ('I').maskAny();
+ if (iMask) {
+ usableBytes =
+ std::min(usableBytes, ByteVector::countInitialZeros(iMask));
+ }
+ }
+ // 0xFF for each byte in word that is uppercase, 0x00 for all others.
+ ByteVector uppercaseMask = word.compareGT('A' - 1) & word.compareLT('Z' + 1);
+ word |= (uppercaseMask & ByteVector(0x20)); // Set the ascii lowercase bit.
+ }
+
+ if (!(options & kDiacriticSensitive)) {
+ ByteVector::Mask diacriticMask =
+ word.compareEQ('^').maskAny() | word.compareEQ('`').maskAny();
+ if (diacriticMask) {
+ usableBytes =
+ std::min(usableBytes, ByteVector::countInitialZeros(diacriticMask));
+ }
+ }
+
+ word.store(&*outputIt);
+ outputIt += usableBytes;
+ inputIt += usableBytes;
+ if (usableBytes == ByteVector::size)
+ continue;
+ }
+ // If we get here, inputIt is positioned on a byte that we know needs special handling.
+ // Either it isn't ASCII or it is a diacritic that needs to be stripped.
+ }
+#endif
const uint8_t firstByte = *inputIt++;
char32_t codepoint = 0;
if (firstByte <= 0x7f) {