diff options
author | Mathias Stearn <mathias@10gen.com> | 2016-02-24 19:55:22 -0500 |
---|---|---|
committer | Mathias Stearn <mathias@10gen.com> | 2016-03-11 08:50:18 -0500 |
commit | 67eee08bb606537df7417670d423c0527dd6221f (patch) | |
tree | cf3cde1244e74fac2950edd2c9f636947ea1d15f | |
parent | 6c3157f126bb44ab275325e85de7abee5ce9ad6d (diff) | |
download | mongo-67eee08bb606537df7417670d423c0527dd6221f.tar.gz |
SERVER-19936 Vector-optimize FTS phrase matches
Now handles up to 16 bytes of ASCII at a time if SSE2 is enabled.
-rw-r--r-- | src/mongo/db/fts/unicode/SConscript | 9 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/byte_vector.h | 38 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/byte_vector_sse2.h | 141 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/byte_vector_test.cpp | 183 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/string.cpp | 41 |
5 files changed, 412 insertions, 0 deletions
diff --git a/src/mongo/db/fts/unicode/SConscript b/src/mongo/db/fts/unicode/SConscript index e4dfdda8171..dc9980ec50d 100644 --- a/src/mongo/db/fts/unicode/SConscript +++ b/src/mongo/db/fts/unicode/SConscript @@ -69,3 +69,12 @@ env.CppUnitTest( 'unicode', ] ) + +env.CppUnitTest( + target='byte_vector_test', + source=[ + 'byte_vector_test.cpp' + ], + LIBDEPS=[ + ] +) diff --git a/src/mongo/db/fts/unicode/byte_vector.h b/src/mongo/db/fts/unicode/byte_vector.h new file mode 100644 index 00000000000..d361a857ff1 --- /dev/null +++ b/src/mongo/db/fts/unicode/byte_vector.h @@ -0,0 +1,38 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#define MONGO_HAVE_FAST_BYTE_VECTOR + +// TODO replace this with #if BOOST_HW_SIMD_X86 >= BOOST_HW_SIMD_X86_SSE2_VERSION in boost 1.60 +#if defined(_M_AMD64) || defined(__amd64__) +#include "mongo/db/fts/unicode/byte_vector_sse2.h" +#else // Other platforms go above here. +#undef MONGO_HAVE_FAST_BYTE_VECTOR +#endif diff --git a/src/mongo/db/fts/unicode/byte_vector_sse2.h b/src/mongo/db/fts/unicode/byte_vector_sse2.h new file mode 100644 index 00000000000..6262488085a --- /dev/null +++ b/src/mongo/db/fts/unicode/byte_vector_sse2.h @@ -0,0 +1,141 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <cstdint> +#include <emmintrin.h> + +#include "mongo/platform/bits.h" + +namespace mongo { +namespace unicode { + +/** + * A sequence of bytes that can be manipulated using vectorized instructions. + * + * This is specific to the use case in mongo::unicode::String and not intended as a general purpose + * vector class. + */ +class ByteVector { +public: + using Native = __m128i; + using Mask = uint32_t; // should be uint16_t but better codegen with uint32_t. + using Scalar = int8_t; + static const int size = sizeof(Native); + + /** + * Sets all bytes to 0. + */ + ByteVector() : _data(_mm_setzero_si128()) {} + + /** + * Sets all bytes to val. + */ + explicit ByteVector(Scalar val) : _data(_mm_set1_epi8(val)) {} + + /** + * Load a vector from a potentially unaligned location. + */ + static ByteVector load(const void* ptr) { + // This function is documented as taking an unaligned pointer. + return _mm_loadu_si128(reinterpret_cast<const Native*>(ptr)); + } + + /** + * Store this vector to a potentially unaligned location. + */ + void store(void* ptr) const { + // This function is documented as taking an unaligned pointer. + _mm_storeu_si128(reinterpret_cast<Native*>(ptr), _data); + } + + /** + * Returns a bitmask with the high bit from each byte. + */ + Mask maskHigh() const { + return _mm_movemask_epi8(_data); + } + + /** + * Returns a bitmask with any bit from each byte. + * + * This operation only makes sense if all bytes are either 0x00 or 0xff, such as the result from + * comparison operations. + */ + Mask maskAny() const { + return maskHigh(); // Other archs may be more efficient here. + } + + /** + * Counts zero bits in mask from whichever side corresponds to the lowest memory address. + */ + static uint32_t countInitialZeros(Mask mask) { + return mask == 0 ? size : countTrailingZeros64(mask); + } + + /** + * Sets each byte to 0xff if it is ==(EQ), <(LT), or >(GT), otherwise 0x00. + * + * May use either signed or unsigned comparisons since this use case doesn't care about bytes + * with high bit set. + */ + ByteVector compareEQ(Scalar val) const { + return _mm_cmpeq_epi8(_data, ByteVector(val)._data); + } + ByteVector compareLT(Scalar val) const { + return _mm_cmplt_epi8(_data, ByteVector(val)._data); + } + ByteVector compareGT(Scalar val) const { + return _mm_cmpgt_epi8(_data, ByteVector(val)._data); + } + + ByteVector operator|(ByteVector other) const { + return _mm_or_si128(_data, other._data); + } + + ByteVector& operator|=(ByteVector other) { + return (*this = (*this | other)); + } + + ByteVector operator&(ByteVector other) const { + return _mm_and_si128(_data, other._data); + } + + ByteVector& operator&=(ByteVector other) { + return (*this = (*this & other)); + } + +private: + ByteVector(Native data) : _data(data) {} + + Native _data; +}; + +} // namespace unicode +} // namespace mongo diff --git a/src/mongo/db/fts/unicode/byte_vector_test.cpp b/src/mongo/db/fts/unicode/byte_vector_test.cpp new file mode 100644 index 00000000000..bab382ab8e5 --- /dev/null +++ b/src/mongo/db/fts/unicode/byte_vector_test.cpp @@ -0,0 +1,183 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include <cstring> +#include <iterator> +#include <numeric> + +#include "mongo/db/fts/unicode/byte_vector.h" +#include "mongo/unittest/unittest.h" + +#ifdef MONGO_HAVE_FAST_BYTE_VECTOR +namespace mongo { +namespace unicode { + +TEST(ByteVector, LoadStoreUnaligned) { + uint8_t inputBuf[ByteVector::size * 2]; + uint8_t outputBuf[ByteVector::size * 2]; + std::iota(std::begin(inputBuf), std::end(inputBuf), 0); + + // Try loads and stores at all possible (mis)alignments. + for (size_t offset = 0; offset < ByteVector::size; offset++) { + std::memset(outputBuf, 0, sizeof(outputBuf)); + ByteVector::load(inputBuf + offset).store(outputBuf + offset); + + for (size_t i = 0; i < ByteVector::size; i++) { + ASSERT_EQ(outputBuf[offset + i], inputBuf[offset + i]); + } + } +} + +TEST(ByteVector, Splat) { + uint8_t outputBuf[ByteVector::size] = {}; + ByteVector(0x12).store(outputBuf); + + for (size_t i = 0; i < ByteVector::size; i++) { + ASSERT_EQ(outputBuf[i], 0x12); + } +} + +TEST(ByteVector, MaskAny) { + uint8_t inputBuf[ByteVector::size]; + std::memset(inputBuf, 0xFF, sizeof(inputBuf)); + for (size_t offset = 0; offset <= ByteVector::size; offset++) { + auto mask = ByteVector::load(inputBuf).maskAny(); + ASSERT_EQ(ByteVector::countInitialZeros(mask), offset); + if (offset < ByteVector::size) { + inputBuf[offset] = 0; // Add an initial 0 for the next loop. + } + } +} + +TEST(ByteVector, MaskHigh) { + uint8_t inputBuf[ByteVector::size]; + std::memset(inputBuf, 0x80, sizeof(inputBuf)); + for (size_t offset = 0; offset <= ByteVector::size; offset++) { + auto mask = ByteVector::load(inputBuf).maskHigh(); + ASSERT_EQ(ByteVector::countInitialZeros(mask), offset); + if (offset < ByteVector::size) { + inputBuf[offset] = 0x7f; // Add an initial 0 bit for the next loop. + } + } +} + +TEST(ByteVector, CompareEQ) { + uint8_t inputBuf[ByteVector::size]; + uint8_t outputBuf[ByteVector::size] = {}; + std::iota(std::begin(inputBuf), std::end(inputBuf), 0); + + ByteVector::load(inputBuf).compareEQ(3).store(outputBuf); + + for (size_t i = 0; i < ByteVector::size; i++) { + ASSERT_EQ(outputBuf[i], inputBuf[i] == 3 ? 0xFF : 0x00); + } +} + +TEST(ByteVector, CompareGT) { + uint8_t inputBuf[ByteVector::size]; + uint8_t outputBuf[ByteVector::size] = {}; + std::iota(std::begin(inputBuf), std::end(inputBuf), 0); + + ByteVector::load(inputBuf).compareGT(3).store(outputBuf); + + for (size_t i = 0; i < ByteVector::size; i++) { + ASSERT_EQ(outputBuf[i], inputBuf[i] > 3 ? 0xFF : 0x00); + } +} + +TEST(ByteVector, CompareLT) { + uint8_t inputBuf[ByteVector::size]; + uint8_t outputBuf[ByteVector::size] = {}; + std::iota(std::begin(inputBuf), std::end(inputBuf), 0); + + ByteVector::load(inputBuf).compareLT(3).store(outputBuf); + + for (size_t i = 0; i < ByteVector::size; i++) { + ASSERT_EQ(outputBuf[i], inputBuf[i] < 3 ? 0xFF : 0x00); + } +} + +TEST(ByteVector, BitOr) { + uint8_t inputBuf[ByteVector::size]; + uint8_t outputBuf[ByteVector::size] = {}; + std::iota(std::begin(inputBuf), std::end(inputBuf), 0); + + (ByteVector::load(inputBuf) | ByteVector(0x1)).store(outputBuf); + + for (size_t i = 0; i < ByteVector::size; i++) { + ASSERT_EQ(outputBuf[i], inputBuf[i] | 1); + } +} + +TEST(ByteVector, BitOrAssign) { + uint8_t inputBuf[ByteVector::size]; + uint8_t outputBuf[ByteVector::size] = {}; + std::iota(std::begin(inputBuf), std::end(inputBuf), 0); + + auto vec = ByteVector::load(inputBuf); + vec |= ByteVector(2); + vec.store(outputBuf); + + for (size_t i = 0; i < ByteVector::size; i++) { + ASSERT_EQ(outputBuf[i], inputBuf[i] | 2); + } +} + +TEST(ByteVector, BitAnd) { + uint8_t inputBuf[ByteVector::size]; + uint8_t outputBuf[ByteVector::size] = {}; + std::iota(std::begin(inputBuf), std::end(inputBuf), 0); + + (ByteVector::load(inputBuf) & ByteVector(2)).store(outputBuf); + + for (size_t i = 0; i < ByteVector::size; i++) { + ASSERT_EQ(outputBuf[i], inputBuf[i] & 2); + } +} + +TEST(ByteVector, BitAndAssign) { + uint8_t inputBuf[ByteVector::size]; + uint8_t outputBuf[ByteVector::size] = {}; + std::iota(std::begin(inputBuf), std::end(inputBuf), 0); + + auto vec = ByteVector::load(inputBuf); + vec &= ByteVector(2); + vec.store(outputBuf); + + for (size_t i = 0; i < ByteVector::size; i++) { + ASSERT_EQ(outputBuf[i], inputBuf[i] & 2); + } +} + +} // namespace unicode +} // namespace mongo +#else +// Our unittest framework gets angry if there are no tests. If we don't have ByteVector, give it a +// dummy test to make it happy. +TEST(ByteVector, ByteVectorNotSupportedOnThisPlatform) {} +#endif diff --git a/src/mongo/db/fts/unicode/string.cpp b/src/mongo/db/fts/unicode/string.cpp index 0ea37d8f33f..483cb7b22eb 100644 --- a/src/mongo/db/fts/unicode/string.cpp +++ b/src/mongo/db/fts/unicode/string.cpp @@ -31,6 +31,7 @@ #include <algorithm> #include <boost/algorithm/searching/boyer_moore.hpp> +#include "mongo/db/fts/unicode/byte_vector.h" #include "mongo/platform/bits.h" #include "mongo/shell/linenoise_utf8.h" #include "mongo/util/assert_util.h" @@ -171,6 +172,46 @@ std::pair<std::unique_ptr<char[]>, char*> String::prepForSubstrMatch(StringData auto outputIt = buffer.get(); for (auto inputIt = utf8.begin(), endIt = utf8.end(); inputIt != endIt;) { +#ifdef MONGO_HAVE_FAST_BYTE_VECTOR + if (size_t(endIt - inputIt) >= ByteVector::size) { + // Try the fast path for 16 contiguous bytes of ASCII. + auto word = ByteVector::load(&*inputIt); + + // Count the bytes of ASCII. + uint32_t usableBytes = ByteVector::countInitialZeros(word.maskHigh()); + if (usableBytes) { + if (!(options & kCaseSensitive)) { + if (mode == CaseFoldMode::kTurkish) { + ByteVector::Mask iMask = word.compareEQ('I').maskAny(); + if (iMask) { + usableBytes = + std::min(usableBytes, ByteVector::countInitialZeros(iMask)); + } + } + // 0xFF for each byte in word that is uppercase, 0x00 for all others. + ByteVector uppercaseMask = word.compareGT('A' - 1) & word.compareLT('Z' + 1); + word |= (uppercaseMask & ByteVector(0x20)); // Set the ascii lowercase bit. + } + + if (!(options & kDiacriticSensitive)) { + ByteVector::Mask diacriticMask = + word.compareEQ('^').maskAny() | word.compareEQ('`').maskAny(); + if (diacriticMask) { + usableBytes = + std::min(usableBytes, ByteVector::countInitialZeros(diacriticMask)); + } + } + + word.store(&*outputIt); + outputIt += usableBytes; + inputIt += usableBytes; + if (usableBytes == ByteVector::size) + continue; + } + // If we get here, inputIt is positioned on a byte that we know needs special handling. + // Either it isn't ASCII or it is a diacritic that needs to be stripped. + } +#endif const uint8_t firstByte = *inputIt++; char32_t codepoint = 0; if (firstByte <= 0x7f) { |