SERVER-19936 Vector-optimize FTS phrase matches

Now handles up to 16 bytes of ASCII at a time if SSE2 is enabled.
author: Mathias Stearn <mathias@10gen.com> 2016-02-24 19:55:22 -0500
committer: Mathias Stearn <mathias@10gen.com> 2016-03-11 08:50:18 -0500
commit: 67eee08bb606537df7417670d423c0527dd6221f (patch)
tree: cf3cde1244e74fac2950edd2c9f636947ea1d15f
parent: 6c3157f126bb44ab275325e85de7abee5ce9ad6d (diff)
download: mongo-67eee08bb606537df7417670d423c0527dd6221f.tar.gz
5 files changed, 412 insertions, 0 deletions
diff --git a/src/mongo/db/fts/unicode/SConscript b/src/mongo/db/fts/unicode/SConscript
index e4dfdda8171..dc9980ec50d 100644
--- a/src/mongo/db/fts/unicode/SConscript
+++ b/src/mongo/db/fts/unicode/SConscript
@@ -69,3 +69,12 @@ env.CppUnitTest(
         'unicode', 
     ]
 )
+
+env.CppUnitTest(
+    target='byte_vector_test',
+    source=[
+        'byte_vector_test.cpp'
+    ],
+    LIBDEPS=[
+    ]
+)
diff --git a/src/mongo/db/fts/unicode/byte_vector.h b/src/mongo/db/fts/unicode/byte_vector.h
new file mode 100644
index 00000000000..d361a857ff1
--- /dev/null
+++ b/src/mongo/db/fts/unicode/byte_vector.h
@@ -0,0 +1,38 @@
+/**
+ *    Copyright (C) 2016 MongoDB Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#define MONGO_HAVE_FAST_BYTE_VECTOR
+
+// TODO replace this with #if BOOST_HW_SIMD_X86 >= BOOST_HW_SIMD_X86_SSE2_VERSION in boost 1.60
+#if defined(_M_AMD64) || defined(__amd64__)
+#include "mongo/db/fts/unicode/byte_vector_sse2.h"
+#else  // Other platforms go above here.
+#undef MONGO_HAVE_FAST_BYTE_VECTOR
+#endif
diff --git a/src/mongo/db/fts/unicode/byte_vector_sse2.h b/src/mongo/db/fts/unicode/byte_vector_sse2.h
new file mode 100644
index 00000000000..6262488085a
--- /dev/null
+++ b/src/mongo/db/fts/unicode/byte_vector_sse2.h
@@ -0,0 +1,141 @@
+/**
+ *    Copyright (C) 2016 MongoDB Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <emmintrin.h>
+
+#include "mongo/platform/bits.h"
+
+namespace mongo {
+namespace unicode {
+
+/**
+ * A sequence of bytes that can be manipulated using vectorized instructions.
+ *
+ * This is specific to the use case in mongo::unicode::String and not intended as a general purpose
+ * vector class.
+ */
+class ByteVector {
+public:
+    using Native = __m128i;
+    using Mask = uint32_t;  // should be uint16_t but better codegen with uint32_t.
+    using Scalar = int8_t;
+    static const int size = sizeof(Native);
+
+    /**
+     * Sets all bytes to 0.
+     */
+    ByteVector() : _data(_mm_setzero_si128()) {}
+
+    /**
+     * Sets all bytes to val.
+     */
+    explicit ByteVector(Scalar val) : _data(_mm_set1_epi8(val)) {}
+
+    /**
+     * Load a vector from a potentially unaligned location.
+     */
+    static ByteVector load(const void* ptr) {
+        // This function is documented as taking an unaligned pointer.
+        return _mm_loadu_si128(reinterpret_cast<const Native*>(ptr));
+    }
+
+    /**
+     * Store this vector to a potentially unaligned location.
+     */
+    void store(void* ptr) const {
+        // This function is documented as taking an unaligned pointer.
+        _mm_storeu_si128(reinterpret_cast<Native*>(ptr), _data);
+    }
+
+    /**
+     * Returns a bitmask with the high bit from each byte.
+     */
+    Mask maskHigh() const {
+        return _mm_movemask_epi8(_data);
+    }
+
+    /**
+     * Returns a bitmask with any bit from each byte.
+     *
+     * This operation only makes sense if all bytes are either 0x00 or 0xff, such as the result from
+     * comparison operations.
+     */
+    Mask maskAny() const {
+        return maskHigh();  // Other archs may be more efficient here.
+    }
+
+    /**
+     * Counts zero bits in mask from whichever side corresponds to the lowest memory address.
+     */
+    static uint32_t countInitialZeros(Mask mask) {
+        return mask == 0 ? size : countTrailingZeros64(mask);
+    }
+
+    /**
+     * Sets each byte to 0xff if it is ==(EQ), <(LT), or >(GT), otherwise 0x00.
+     *
+     * May use either signed or unsigned comparisons since this use case doesn't care about bytes
+     * with high bit set.
+     */
+    ByteVector compareEQ(Scalar val) const {
+        return _mm_cmpeq_epi8(_data, ByteVector(val)._data);
+    }
+    ByteVector compareLT(Scalar val) const {
+        return _mm_cmplt_epi8(_data, ByteVector(val)._data);
+    }
+    ByteVector compareGT(Scalar val) const {
+        return _mm_cmpgt_epi8(_data, ByteVector(val)._data);
+    }
+
+    ByteVector operator|(ByteVector other) const {
+        return _mm_or_si128(_data, other._data);
+    }
+
+    ByteVector& operator|=(ByteVector other) {
+        return (*this = (*this | other));
+    }
+
+    ByteVector operator&(ByteVector other) const {
+        return _mm_and_si128(_data, other._data);
+    }
+
+    ByteVector& operator&=(ByteVector other) {
+        return (*this = (*this & other));
+    }
+
+private:
+    ByteVector(Native data) : _data(data) {}
+
+    Native _data;
+};
+
+}  // namespace unicode
+}  // namespace mongo
diff --git a/src/mongo/db/fts/unicode/byte_vector_test.cpp b/src/mongo/db/fts/unicode/byte_vector_test.cpp
new file mode 100644
index 00000000000..bab382ab8e5
--- /dev/null
+++ b/src/mongo/db/fts/unicode/byte_vector_test.cpp
@@ -0,0 +1,183 @@
+/**
+ *    Copyright (C) 2016 MongoDB Inc.
+ *
+ *    This program is free software: you can redistribute it and/or  modify
+ *    it under the terms of the GNU Affero General Public License, version 3,
+ *    as published by the Free Software Foundation.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    GNU Affero General Public License for more details.
+ *
+ *    You should have received a copy of the GNU Affero General Public License
+ *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the GNU Affero General Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include <cstring>
+#include <iterator>
+#include <numeric>
+
+#include "mongo/db/fts/unicode/byte_vector.h"
+#include "mongo/unittest/unittest.h"
+
+#ifdef MONGO_HAVE_FAST_BYTE_VECTOR
+namespace mongo {
+namespace unicode {
+
+TEST(ByteVector, LoadStoreUnaligned) {
+    uint8_t inputBuf[ByteVector::size * 2];
+    uint8_t outputBuf[ByteVector::size * 2];
+    std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+    // Try loads and stores at all possible (mis)alignments.
+    for (size_t offset = 0; offset < ByteVector::size; offset++) {
+        std::memset(outputBuf, 0, sizeof(outputBuf));
+        ByteVector::load(inputBuf + offset).store(outputBuf + offset);
+
+        for (size_t i = 0; i < ByteVector::size; i++) {
+            ASSERT_EQ(outputBuf[offset + i], inputBuf[offset + i]);
+        }
+    }
+}
+
+TEST(ByteVector, Splat) {
+    uint8_t outputBuf[ByteVector::size] = {};
+    ByteVector(0x12).store(outputBuf);
+
+    for (size_t i = 0; i < ByteVector::size; i++) {
+        ASSERT_EQ(outputBuf[i], 0x12);
+    }
+}
+
+TEST(ByteVector, MaskAny) {
+    uint8_t inputBuf[ByteVector::size];
+    std::memset(inputBuf, 0xFF, sizeof(inputBuf));
+    for (size_t offset = 0; offset <= ByteVector::size; offset++) {
+        auto mask = ByteVector::load(inputBuf).maskAny();
+        ASSERT_EQ(ByteVector::countInitialZeros(mask), offset);
+        if (offset < ByteVector::size) {
+            inputBuf[offset] = 0;  // Add an initial 0 for the next loop.
+        }
+    }
+}
+
+TEST(ByteVector, MaskHigh) {
+    uint8_t inputBuf[ByteVector::size];
+    std::memset(inputBuf, 0x80, sizeof(inputBuf));
+    for (size_t offset = 0; offset <= ByteVector::size; offset++) {
+        auto mask = ByteVector::load(inputBuf).maskHigh();
+        ASSERT_EQ(ByteVector::countInitialZeros(mask), offset);
+        if (offset < ByteVector::size) {
+            inputBuf[offset] = 0x7f;  // Add an initial 0 bit for the next loop.
+        }
+    }
+}
+
+TEST(ByteVector, CompareEQ) {
+    uint8_t inputBuf[ByteVector::size];
+    uint8_t outputBuf[ByteVector::size] = {};
+    std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+    ByteVector::load(inputBuf).compareEQ(3).store(outputBuf);
+
+    for (size_t i = 0; i < ByteVector::size; i++) {
+        ASSERT_EQ(outputBuf[i], inputBuf[i] == 3 ? 0xFF : 0x00);
+    }
+}
+
+TEST(ByteVector, CompareGT) {
+    uint8_t inputBuf[ByteVector::size];
+    uint8_t outputBuf[ByteVector::size] = {};
+    std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+    ByteVector::load(inputBuf).compareGT(3).store(outputBuf);
+
+    for (size_t i = 0; i < ByteVector::size; i++) {
+        ASSERT_EQ(outputBuf[i], inputBuf[i] > 3 ? 0xFF : 0x00);
+    }
+}
+
+TEST(ByteVector, CompareLT) {
+    uint8_t inputBuf[ByteVector::size];
+    uint8_t outputBuf[ByteVector::size] = {};
+    std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+    ByteVector::load(inputBuf).compareLT(3).store(outputBuf);
+
+    for (size_t i = 0; i < ByteVector::size; i++) {
+        ASSERT_EQ(outputBuf[i], inputBuf[i] < 3 ? 0xFF : 0x00);
+    }
+}
+
+TEST(ByteVector, BitOr) {
+    uint8_t inputBuf[ByteVector::size];
+    uint8_t outputBuf[ByteVector::size] = {};
+    std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+    (ByteVector::load(inputBuf) | ByteVector(0x1)).store(outputBuf);
+
+    for (size_t i = 0; i < ByteVector::size; i++) {
+        ASSERT_EQ(outputBuf[i], inputBuf[i] | 1);
+    }
+}
+
+TEST(ByteVector, BitOrAssign) {
+    uint8_t inputBuf[ByteVector::size];
+    uint8_t outputBuf[ByteVector::size] = {};
+    std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+    auto vec = ByteVector::load(inputBuf);
+    vec |= ByteVector(2);
+    vec.store(outputBuf);
+
+    for (size_t i = 0; i < ByteVector::size; i++) {
+        ASSERT_EQ(outputBuf[i], inputBuf[i] | 2);
+    }
+}
+
+TEST(ByteVector, BitAnd) {
+    uint8_t inputBuf[ByteVector::size];
+    uint8_t outputBuf[ByteVector::size] = {};
+    std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+    (ByteVector::load(inputBuf) & ByteVector(2)).store(outputBuf);
+
+    for (size_t i = 0; i < ByteVector::size; i++) {
+        ASSERT_EQ(outputBuf[i], inputBuf[i] & 2);
+    }
+}
+
+TEST(ByteVector, BitAndAssign) {
+    uint8_t inputBuf[ByteVector::size];
+    uint8_t outputBuf[ByteVector::size] = {};
+    std::iota(std::begin(inputBuf), std::end(inputBuf), 0);
+
+    auto vec = ByteVector::load(inputBuf);
+    vec &= ByteVector(2);
+    vec.store(outputBuf);
+
+    for (size_t i = 0; i < ByteVector::size; i++) {
+        ASSERT_EQ(outputBuf[i], inputBuf[i] & 2);
+    }
+}
+
+}  // namespace unicode
+}  // namespace mongo
+#else
+// Our unittest framework gets angry if there are no tests. If we don't have ByteVector, give it a
+// dummy test to make it happy.
+TEST(ByteVector, ByteVectorNotSupportedOnThisPlatform) {}
+#endif
diff --git a/src/mongo/db/fts/unicode/string.cpp b/src/mongo/db/fts/unicode/string.cpp
index 0ea37d8f33f..483cb7b22eb 100644
--- a/src/mongo/db/fts/unicode/string.cpp
+++ b/src/mongo/db/fts/unicode/string.cpp
@@ -31,6 +31,7 @@
 #include <algorithm>
 #include <boost/algorithm/searching/boyer_moore.hpp>
 
+#include "mongo/db/fts/unicode/byte_vector.h"
 #include "mongo/platform/bits.h"
 #include "mongo/shell/linenoise_utf8.h"
 #include "mongo/util/assert_util.h"
@@ -171,6 +172,46 @@ std::pair<std::unique_ptr<char[]>, char*> String::prepForSubstrMatch(StringData
     auto outputIt = buffer.get();
 
     for (auto inputIt = utf8.begin(), endIt = utf8.end(); inputIt != endIt;) {
+#ifdef MONGO_HAVE_FAST_BYTE_VECTOR
+        if (size_t(endIt - inputIt) >= ByteVector::size) {
+            // Try the fast path for 16 contiguous bytes of ASCII.
+            auto word = ByteVector::load(&*inputIt);
+
+            // Count the bytes of ASCII.
+            uint32_t usableBytes = ByteVector::countInitialZeros(word.maskHigh());
+            if (usableBytes) {
+                if (!(options & kCaseSensitive)) {
+                    if (mode == CaseFoldMode::kTurkish) {
+                        ByteVector::Mask iMask = word.compareEQ('I').maskAny();
+                        if (iMask) {
+                            usableBytes =
+                                std::min(usableBytes, ByteVector::countInitialZeros(iMask));
+                        }
+                    }
+                    // 0xFF for each byte in word that is uppercase, 0x00 for all others.
+                    ByteVector uppercaseMask = word.compareGT('A' - 1) & word.compareLT('Z' + 1);
+                    word |= (uppercaseMask & ByteVector(0x20));  // Set the ascii lowercase bit.
+                }
+
+                if (!(options & kDiacriticSensitive)) {
+                    ByteVector::Mask diacriticMask =
+                        word.compareEQ('^').maskAny() | word.compareEQ('`').maskAny();
+                    if (diacriticMask) {
+                        usableBytes =
+                            std::min(usableBytes, ByteVector::countInitialZeros(diacriticMask));
+                    }
+                }
+
+                word.store(&*outputIt);
+                outputIt += usableBytes;
+                inputIt += usableBytes;
+                if (usableBytes == ByteVector::size)
+                    continue;
+            }
+            // If we get here, inputIt is positioned on a byte that we know needs special handling.
+            // Either it isn't ASCII or it is a diacritic that needs to be stripped.
+        }
+#endif
         const uint8_t firstByte = *inputIt++;
         char32_t codepoint = 0;
         if (firstByte <= 0x7f) {
author	Mathias Stearn <mathias@10gen.com>	2016-02-24 19:55:22 -0500
committer	Mathias Stearn <mathias@10gen.com>	2016-03-11 08:50:18 -0500
commit	67eee08bb606537df7417670d423c0527dd6221f (patch)
tree	cf3cde1244e74fac2950edd2c9f636947ea1d15f
parent	6c3157f126bb44ab275325e85de7abee5ce9ad6d (diff)
download	mongo-67eee08bb606537df7417670d423c0527dd6221f.tar.gz