summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mongo/db/exec/sbe/sbe_mkobj_test.cpp2
-rw-r--r--src/mongo/db/exec/sbe/sbe_test.cpp48
-rw-r--r--src/mongo/db/exec/sbe/stages/bson_scan.cpp2
-rw-r--r--src/mongo/db/exec/sbe/stages/check_bounds.cpp12
-rw-r--r--src/mongo/db/exec/sbe/stages/check_bounds.h3
-rw-r--r--src/mongo/db/exec/sbe/stages/makeobj.cpp2
-rw-r--r--src/mongo/db/exec/sbe/stages/scan.cpp4
-rw-r--r--src/mongo/db/exec/sbe/stages/sort.cpp1
-rw-r--r--src/mongo/db/exec/sbe/values/bson.cpp38
-rw-r--r--src/mongo/db/exec/sbe/values/bson.h4
-rw-r--r--src/mongo/db/exec/sbe/values/value.cpp4
-rw-r--r--src/mongo/db/exec/sbe/values/value.h54
-rw-r--r--src/mongo/db/exec/sbe/vm/vm.cpp6
-rw-r--r--src/mongo/db/query/index_bounds.cpp21
-rw-r--r--src/mongo/db/query/index_bounds.h2
-rw-r--r--src/mongo/db/query/sbe_stage_builder_filter.cpp12
-rw-r--r--src/mongo/db/query/sbe_stage_builder_helpers.cpp2
-rw-r--r--src/mongo/db/sorter/sorter.cpp11
-rw-r--r--src/mongo/db/sorter/sorter.h15
-rw-r--r--src/mongo/db/storage/key_string.cpp12
-rw-r--r--src/mongo/db/storage/key_string.h2
-rw-r--r--src/mongo/platform/bits.h69
22 files changed, 275 insertions, 51 deletions
diff --git a/src/mongo/db/exec/sbe/sbe_mkobj_test.cpp b/src/mongo/db/exec/sbe/sbe_mkobj_test.cpp
index e107643e51c..644d2df4d4b 100644
--- a/src/mongo/db/exec/sbe/sbe_mkobj_test.cpp
+++ b/src/mongo/db/exec/sbe/sbe_mkobj_test.cpp
@@ -51,7 +51,7 @@ public:
const char* end = be + obj.objsize();
while (*be != 0) {
auto sv = bson::fieldNameView(be);
- auto [tag, val] = bson::convertFrom(false, be, end, sv.size());
+ auto [tag, val] = bson::convertFrom<false>(be, end, sv.size());
be = bson::advance(be, sv.size());
objView->push_back(sv, tag, val);
diff --git a/src/mongo/db/exec/sbe/sbe_test.cpp b/src/mongo/db/exec/sbe/sbe_test.cpp
index ad7529e96bd..ced20818106 100644
--- a/src/mongo/db/exec/sbe/sbe_test.cpp
+++ b/src/mongo/db/exec/sbe/sbe_test.cpp
@@ -401,4 +401,52 @@ TEST(SBEVM, ConvertBinDataToBsonObj) {
ASSERT_EQ(originalBinData.woCompare(convertedBinData), 0);
}
+
+namespace {
+
+/**
+ * Fills bytes after the null terminator in the string with 'pattern'.
+ *
+ * We use this function in the tests to ensure that the implementation of 'getStringLength' for
+ * 'StringSmall' type does not rely on the fact that all bytes after null terminator are zero or any
+ * other special value.
+ */
+void fillSmallStringTail(value::Value val, char pattern) {
+ char* rawView = value::getRawStringView(value::TypeTags::StringSmall, val);
+ for (auto i = std::strlen(rawView) + 1; i <= value::kSmallStringMaxLength; i++) {
+ rawView[i] = pattern;
+ }
+}
+} // namespace
+
+TEST(SBESmallString, Length) {
+ std::vector<std::pair<std::string, size_t>> testCases{
+ {"", 0},
+ {"a", 1},
+ {"ab", 2},
+ {"abc", 3},
+ {"abcd", 4},
+ {"abcde", 5},
+ {"abcdef", 6},
+ {"abcdefh", 7},
+ };
+
+ for (const auto& [string, length] : testCases) {
+ ASSERT(value::canUseSmallString(string));
+ auto [tag, val] = value::makeSmallString(string);
+ ASSERT_EQ(tag, value::TypeTags::StringSmall);
+
+ fillSmallStringTail(val, char(0));
+ ASSERT_EQ(length, value::getStringLength(tag, val));
+
+ fillSmallStringTail(val, char(1));
+ ASSERT_EQ(length, value::getStringLength(tag, val));
+
+ fillSmallStringTail(val, char(1 << 7));
+ ASSERT_EQ(length, value::getStringLength(tag, val));
+
+ fillSmallStringTail(val, ~char(0));
+ ASSERT_EQ(length, value::getStringLength(tag, val));
+ }
+}
} // namespace mongo::sbe
diff --git a/src/mongo/db/exec/sbe/stages/bson_scan.cpp b/src/mongo/db/exec/sbe/stages/bson_scan.cpp
index 430977f3ac4..2599cf0bae8 100644
--- a/src/mongo/db/exec/sbe/stages/bson_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/bson_scan.cpp
@@ -109,7 +109,7 @@ PlanState BSONScanStage::getNext() {
auto sv = bson::fieldNameView(be);
if (auto it = _fieldAccessors.find(sv); it != _fieldAccessors.end()) {
// Found the field so convert it to Value.
- auto [tag, val] = bson::convertFrom(true, be, end, sv.size());
+ auto [tag, val] = bson::convertFrom<true>(be, end, sv.size());
it->second->reset(tag, val);
diff --git a/src/mongo/db/exec/sbe/stages/check_bounds.cpp b/src/mongo/db/exec/sbe/stages/check_bounds.cpp
index b80e197cb10..2bc86c2ccd5 100644
--- a/src/mongo/db/exec/sbe/stages/check_bounds.cpp
+++ b/src/mongo/db/exec/sbe/stages/check_bounds.cpp
@@ -95,10 +95,14 @@ PlanState CheckBoundsStage::getNext() {
keyTag == value::TypeTags::ksValue);
auto key = value::getKeyStringView(keyVal);
- auto bsonKey = KeyString::toBson(*key, _params.ord);
- IndexSeekPoint seekPoint;
- switch (_checker.checkKey(bsonKey, &seekPoint)) {
+ _keyBuffer.reset();
+ BSONObjBuilder keyBuilder(_keyBuffer);
+ KeyString::toBsonSafe(
+ key->getBuffer(), key->getSize(), _params.ord, key->getTypeBits(), keyBuilder);
+ auto bsonKey = keyBuilder.done();
+
+ switch (_checker.checkKey(bsonKey, &_seekPoint)) {
case IndexBoundsChecker::VALID: {
auto [tag, val] = _inRecordIdAccessor->getViewOfValue();
_outAccessor.reset(false, tag, val);
@@ -112,7 +116,7 @@ PlanState CheckBoundsStage::getNext() {
case IndexBoundsChecker::MUST_ADVANCE: {
auto seekKey = std::make_unique<KeyString::Value>(
IndexEntryComparison::makeKeyStringFromSeekPointForSeek(
- seekPoint, _params.version, _params.ord, _params.direction == 1));
+ _seekPoint, _params.version, _params.ord, _params.direction == 1));
_outAccessor.reset(true,
value::TypeTags::ksValue,
value::bitcastFrom<KeyString::Value*>(seekKey.release()));
diff --git a/src/mongo/db/exec/sbe/stages/check_bounds.h b/src/mongo/db/exec/sbe/stages/check_bounds.h
index c4ccdf7d37e..ae9a4bb3171 100644
--- a/src/mongo/db/exec/sbe/stages/check_bounds.h
+++ b/src/mongo/db/exec/sbe/stages/check_bounds.h
@@ -101,5 +101,8 @@ private:
bool _isEOF{false};
CheckBoundsStats _specificStats;
+
+ BufBuilder _keyBuffer;
+ IndexSeekPoint _seekPoint;
};
} // namespace mongo::sbe
diff --git a/src/mongo/db/exec/sbe/stages/makeobj.cpp b/src/mongo/db/exec/sbe/stages/makeobj.cpp
index ce571bd47d7..dc7f79a7159 100644
--- a/src/mongo/db/exec/sbe/stages/makeobj.cpp
+++ b/src/mongo/db/exec/sbe/stages/makeobj.cpp
@@ -173,7 +173,7 @@ void MakeObjStageBase<MakeObjOutputType::object>::produceObject() {
}
if (!projected && !isFieldRestricted(key)) {
- auto [tag, val] = bson::convertFrom(true, be, end, sv.size());
+ auto [tag, val] = bson::convertFrom<true>(be, end, sv.size());
auto [copyTag, copyVal] = value::copyValue(tag, val);
obj->push_back(sv, copyTag, copyVal);
--nFieldsNeededIfInclusion;
diff --git a/src/mongo/db/exec/sbe/stages/scan.cpp b/src/mongo/db/exec/sbe/stages/scan.cpp
index 791fe61e5f8..19080f474f9 100644
--- a/src/mongo/db/exec/sbe/stages/scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/scan.cpp
@@ -338,7 +338,7 @@ PlanState ScanStage::getNext() {
auto sv = bson::fieldNameView(be);
if (auto it = _fieldAccessors.find(sv); it != _fieldAccessors.end()) {
// Found the field so convert it to Value.
- auto [tag, val] = bson::convertFrom(true, be, end, sv.size());
+ auto [tag, val] = bson::convertFrom<true>(be, end, sv.size());
if (_oplogTsAccessor && it->first == repl::OpTime::kTimestampFieldName) {
auto&& [ownedTag, ownedVal] = value::copyValue(tag, val);
@@ -798,7 +798,7 @@ PlanState ParallelScanStage::getNext() {
auto sv = bson::fieldNameView(be);
if (auto it = _fieldAccessors.find(sv); it != _fieldAccessors.end()) {
// Found the field so convert it to Value.
- auto [tag, val] = bson::convertFrom(true, be, end, sv.size());
+ auto [tag, val] = bson::convertFrom<true>(be, end, sv.size());
it->second->reset(false, tag, val);
diff --git a/src/mongo/db/exec/sbe/stages/sort.cpp b/src/mongo/db/exec/sbe/stages/sort.cpp
index 75490e0db5b..89d4b8b2822 100644
--- a/src/mongo/db/exec/sbe/stages/sort.cpp
+++ b/src/mongo/db/exec/sbe/stages/sort.cpp
@@ -125,6 +125,7 @@ void SortStage::makeSorter() {
opts.extSortAllowed = _allowDiskUse;
opts.limit =
_specificStats.limit != std::numeric_limits<size_t>::max() ? _specificStats.limit : 0;
+ opts.moveSortedDataIntoIterator = true;
auto comp = [&](const SorterData& lhs, const SorterData& rhs) {
auto size = lhs.first.size();
diff --git a/src/mongo/db/exec/sbe/values/bson.cpp b/src/mongo/db/exec/sbe/values/bson.cpp
index 2d5a0cd9bcb..c0dd70eb4d7 100644
--- a/src/mongo/db/exec/sbe/values/bson.cpp
+++ b/src/mongo/db/exec/sbe/values/bson.cpp
@@ -104,8 +104,8 @@ const char* advance(const char* be, size_t fieldNameSize) {
return be;
}
-std::pair<value::TypeTags, value::Value> convertFrom(bool view,
- const char* be,
+template <bool View>
+std::pair<value::TypeTags, value::Value> convertFrom(const char* be,
const char* end,
size_t fieldNameSize) {
auto type = static_cast<BSONType>(static_cast<signed char>(*be));
@@ -118,14 +118,14 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view,
return {value::TypeTags::NumberDouble, value::bitcastFrom<double>(dbl)};
}
case BSONType::NumberDecimal: {
- if (view) {
+ if constexpr (View) {
return {value::TypeTags::NumberDecimal, value::bitcastFrom<const char*>(be)};
}
return value::makeCopyDecimal(value::readDecimal128FromMemory(ConstDataView{be}));
}
case BSONType::String: {
- if (view) {
+ if constexpr (View) {
return {value::TypeTags::bsonString, value::bitcastFrom<const char*>(be)};
}
// len includes trailing zero.
@@ -146,14 +146,14 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view,
}
case BSONType::Symbol: {
auto value = value::bitcastFrom<const char*>(be);
- if (view) {
+ if constexpr (View) {
return {value::TypeTags::bsonSymbol, value};
}
return value::makeNewBsonSymbol(
value::getStringOrSymbolView(value::TypeTags::bsonSymbol, value));
}
case BSONType::BinData: {
- if (view) {
+ if constexpr (View) {
return {value::TypeTags::bsonBinData, value::bitcastFrom<const char*>(be)};
}
@@ -174,7 +174,7 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view,
}
}
case BSONType::Object: {
- if (view) {
+ if constexpr (View) {
return {value::TypeTags::bsonObject, value::bitcastFrom<const char*>(be)};
}
// Skip document length.
@@ -185,7 +185,7 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view,
while (*be != 0) {
auto sv = bson::fieldNameView(be);
- auto [tag, val] = convertFrom(false, be, end, sv.size());
+ auto [tag, val] = convertFrom<false>(be, end, sv.size());
obj->push_back(sv, tag, val);
be = advance(be, sv.size());
@@ -193,7 +193,7 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view,
return {tag, val};
}
case BSONType::Array: {
- if (view) {
+ if constexpr (View) {
return {value::TypeTags::bsonArray, value::bitcastFrom<const char*>(be)};
}
// Skip array length.
@@ -204,7 +204,7 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view,
while (*be != 0) {
auto sv = bson::fieldNameView(be);
- auto [tag, val] = convertFrom(false, be, end, sv.size());
+ auto [tag, val] = convertFrom<false>(be, end, sv.size());
arr->push_back(tag, val);
be = advance(be, sv.size());
@@ -212,7 +212,7 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view,
return {tag, val};
}
case BSONType::jstOID: {
- if (view) {
+ if constexpr (View) {
return {value::TypeTags::bsonObjectId, value::bitcastFrom<const char*>(be)};
}
auto [tag, val] = value::makeNewObjectId();
@@ -247,28 +247,28 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view,
return {value::TypeTags::bsonUndefined, 0};
case BSONType::RegEx: {
auto value = value::bitcastFrom<const char*>(be);
- if (view) {
+ if constexpr (View) {
return {value::TypeTags::bsonRegex, value};
}
return value::makeCopyBsonRegex(value::getBsonRegexView(value));
}
case BSONType::Code: {
auto value = value::bitcastFrom<const char*>(be);
- if (view) {
+ if constexpr (View) {
return {value::TypeTags::bsonJavascript, value};
}
return value::makeCopyBsonJavascript(value::getBsonJavascriptView(value));
}
case BSONType::DBRef: {
auto value = value::bitcastFrom<const char*>(be);
- if (view) {
+ if constexpr (View) {
return {value::TypeTags::bsonDBPointer, value};
}
return value::makeCopyBsonDBPointer(value::getBsonDBPointerView(value));
}
case BSONType::CodeWScope: {
auto value = value::bitcastFrom<const char*>(be);
- if (view) {
+ if constexpr (View) {
return {value::TypeTags::bsonCodeWScope, value};
}
return value::makeCopyBsonCodeWScope(value::getBsonCodeWScopeView(value));
@@ -278,6 +278,14 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view,
}
}
+template std::pair<value::TypeTags, value::Value> convertFrom<false>(const char* be,
+ const char* end,
+ size_t fieldNameSize);
+
+template std::pair<value::TypeTags, value::Value> convertFrom<true>(const char* be,
+ const char* end,
+ size_t fieldNameSize);
+
template <class ArrayBuilder>
void convertToBsonObj(ArrayBuilder& builder, value::ArrayEnumerator arr) {
for (; !arr.atEnd(); arr.advance()) {
diff --git a/src/mongo/db/exec/sbe/values/bson.h b/src/mongo/db/exec/sbe/values/bson.h
index b1dfaa74a8a..73e0cd272e8 100644
--- a/src/mongo/db/exec/sbe/values/bson.h
+++ b/src/mongo/db/exec/sbe/values/bson.h
@@ -35,8 +35,8 @@
namespace mongo {
namespace sbe {
namespace bson {
-std::pair<value::TypeTags, value::Value> convertFrom(bool view,
- const char* be,
+template <bool View>
+std::pair<value::TypeTags, value::Value> convertFrom(const char* be,
const char* end,
size_t fieldNameSize);
diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp
index 26c91f01a4f..58da589319b 100644
--- a/src/mongo/db/exec/sbe/values/value.cpp
+++ b/src/mongo/db/exec/sbe/values/value.cpp
@@ -1137,7 +1137,7 @@ std::pair<TypeTags, Value> ArrayEnumerator::getViewOfValue() const {
return {_iter->first, _iter->second};
} else {
auto sv = bson::fieldNameView(_arrayCurrent);
- return bson::convertFrom(true, _arrayCurrent, _arrayEnd, sv.size());
+ return bson::convertFrom<true>(_arrayCurrent, _arrayEnd, sv.size());
}
}
@@ -1169,7 +1169,7 @@ std::pair<TypeTags, Value> ObjectEnumerator::getViewOfValue() const {
return _object->getAt(_index);
} else {
auto sv = bson::fieldNameView(_objectCurrent);
- return bson::convertFrom(true, _objectCurrent, _objectEnd, sv.size());
+ return bson::convertFrom<true>(_objectCurrent, _objectEnd, sv.size());
}
}
diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h
index a9b891f5567..f5138e8b9f5 100644
--- a/src/mongo/db/exec/sbe/values/value.h
+++ b/src/mongo/db/exec/sbe/values/value.h
@@ -33,6 +33,7 @@
#include <absl/container/flat_hash_set.h>
#include <array>
#include <bitset>
+#include <boost/predef/hardware/simd.h>
#include <cstdint>
#include <ostream>
#include <pcre.h>
@@ -47,7 +48,9 @@
#include "mongo/db/fts/fts_matcher.h"
#include "mongo/db/query/bson_typemask.h"
#include "mongo/db/query/collation/collator_interface.h"
+#include "mongo/platform/bits.h"
#include "mongo/platform/decimal128.h"
+#include "mongo/platform/endian.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/represent_as.h"
@@ -693,7 +696,56 @@ inline const char* getRawStringView(TypeTags tag, const Value& val) noexcept {
*/
inline size_t getStringLength(TypeTags tag, const Value& val) noexcept {
if (tag == TypeTags::StringSmall) {
- return strlen(reinterpret_cast<const char*>(&val));
+ // This path turned out to be very hot in our benchmarks, so we avoid calling 'strlen()' and
+ // use an alternative approach to compute string length.
+ // NOTE: Small string value always contains exactly one zero byte, marking the end of the
+ // string. Bytes after this zero byte can have arbitrary value.
+#if defined(BOOST_HW_SIMD_X86_AVAILABLE) && BOOST_HW_SIMD_X86 >= BOOST_HW_SIMD_X86_SSE2_VERSION
+ // If SSE2 instruction set is available, we use SIMD instructions. There are several steps:
+ // (1) _mm_cvtsi64_si128(val) - Copy string value into the 128-bit register
+ // (2) _mm_cmpeq_epi8 - Make each zero byte equal to 0xFF. Other bytes become zero
+ // (3) _mm_movemask_epi8 - Copy most significant bit of each byte into int
+ // (4) countTrailingZerosNonZeroInt - Get the position of the first trailing bit set
+ static_assert(endian::Order::kNative == endian::Order::kLittle);
+ int ret = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_cvtsi64_si128(val), _mm_setzero_si128()));
+ return countTrailingZerosNonZero32(ret);
+#else
+ // If SSE2 is not available, we use bit magic.
+ const uint64_t magic = 0x7F7F7F7F7F7F7F7FULL;
+
+ // This is based on a trick from following link, which describes how to make an expression
+ // which results in '0' when ALL bytes are non-zero, and results in zero when ANY byte is
+ // zero. Instead of casting this result to bool, we count how many complete 0 bytes there
+ // are in 'ret'. This tells us the length of the string.
+ // https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+
+ // At the end of this, ret will store a value where each byte which was all 0's is now
+ // 10000000 and any byte that was anything non zero is now 0.
+
+ // (1) compute (val & magic). This clears the highest bit in each byte.
+ uint64_t ret = val & magic;
+
+ // (2) add magic to the above expression. The result is that, from overflow,
+ // the high bit will be set if any bit was set in 'v' other than the high bit.
+ ret += magic;
+ // (3) OR this result with v. This ensures that if the high bit in 'v' was set
+ // then the high bit in our result will be set.
+ ret |= val;
+ // (4) OR with magic. This will set any low bits which were not already set. At this point
+ // each byte is either all ones or 01111111.
+ ret |= magic;
+
+ // When we invert this, each byte will either be
+ // all zeros (previously non zero) or 10000000 (previously 0).
+ ret = ~ret;
+
+ // So all we have to do is count how many complete 0 bytes there are from one end.
+ if constexpr (endian::Order::kNative == endian::Order::kLittle) {
+ return (countTrailingZerosNonZero64(ret) >> 3);
+ } else {
+ return (countLeadingZerosNonZero64(ret) >> 3);
+ }
+#endif
} else if (tag == TypeTags::StringBig || tag == TypeTags::bsonString) {
return ConstDataView(getRawPointerView(val)).read<LittleEndian<int32_t>>() - 1;
}
diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp
index 2ca3465b102..ef924ae8972 100644
--- a/src/mongo/db/exec/sbe/vm/vm.cpp
+++ b/src/mongo/db/exec/sbe/vm/vm.cpp
@@ -487,7 +487,7 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::getField(value::TypeTa
auto sv = bson::fieldNameView(be);
if (sv == fieldStr) {
- auto [tag, val] = bson::convertFrom(true, be, end, sv.size());
+ auto [tag, val] = bson::convertFrom<true>(be, end, sv.size());
return {false, tag, val};
}
@@ -844,7 +844,7 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinDropFields(Arit
auto sv = bson::fieldNameView(be);
if (restrictFieldsSet.count(sv) == 0) {
- auto [tag, val] = bson::convertFrom(false, be, end, sv.size());
+ auto [tag, val] = bson::convertFrom<false>(be, end, sv.size());
obj->push_back(sv, tag, val);
}
@@ -2562,7 +2562,7 @@ std::pair<value::TypeTags, value::Value> collComparisonKey(value::TypeTags tag,
auto ptr = outputView.objdata();
auto be = ptr + 4;
auto end = ptr + ConstDataView(ptr).read<LittleEndian<uint32_t>>();
- return bson::convertFrom(false, be, end, 0);
+ return bson::convertFrom<false>(be, end, 0);
}
} // namespace
diff --git a/src/mongo/db/query/index_bounds.cpp b/src/mongo/db/query/index_bounds.cpp
index fb638d39727..8703368fcbd 100644
--- a/src/mongo/db/query/index_bounds.cpp
+++ b/src/mongo/db/query/index_bounds.cpp
@@ -491,6 +491,8 @@ IndexBoundsChecker::IndexBoundsChecker(const IndexBounds* bounds,
const BSONObj& keyPattern,
int scanDirection)
: _bounds(bounds), _curInterval(bounds->fields.size(), 0) {
+ _keyValues.resize(_curInterval.size());
+
BSONObjIterator it(keyPattern);
while (it.more()) {
int indexDirection = it.next().number() >= 0 ? 1 : -1;
@@ -588,19 +590,20 @@ IndexBoundsChecker::KeyState IndexBoundsChecker::checkKey(const BSONObj& key, In
out->suffixInclusive.resize(_curInterval.size());
// It's useful later to go from a field number to the value for that field. Store these.
- // TODO: on optimization pass, populate the vector as-needed and keep the vector around as a
- // member variable
- vector<BSONElement> keyValues;
+ size_t i = 0;
BSONObjIterator keyIt(key);
while (keyIt.more()) {
- keyValues.push_back(keyIt.next());
+ verify(i < _curInterval.size());
+
+ _keyValues[i] = keyIt.next();
+ i++;
}
- verify(keyValues.size() == _curInterval.size());
+ verify(i == _curInterval.size());
size_t firstNonContainedField;
Location orientation;
- if (!findLeftmostProblem(keyValues, &firstNonContainedField, &orientation)) {
+ if (!findLeftmostProblem(_keyValues, &firstNonContainedField, &orientation)) {
// All fields in the index are within the current interval. Caller can use the key.
return VALID;
}
@@ -616,7 +619,7 @@ IndexBoundsChecker::KeyState IndexBoundsChecker::checkKey(const BSONObj& key, In
// ...and try again. This call modifies 'orientation', so we may check its value again
// in the clause below if field number 'firstNonContainedField' isn't in its first
// interval.
- if (!findLeftmostProblem(keyValues, &firstNonContainedField, &orientation)) {
+ if (!findLeftmostProblem(_keyValues, &firstNonContainedField, &orientation)) {
return VALID;
}
}
@@ -646,7 +649,7 @@ IndexBoundsChecker::KeyState IndexBoundsChecker::checkKey(const BSONObj& key, In
// Find the interval that contains our field.
size_t newIntervalForField;
- Location where = findIntervalForField(keyValues[firstNonContainedField],
+ Location where = findIntervalForField(_keyValues[firstNonContainedField],
_bounds->fields[firstNonContainedField],
_expectedDirection[firstNonContainedField],
&newIntervalForField);
@@ -686,7 +689,7 @@ IndexBoundsChecker::KeyState IndexBoundsChecker::checkKey(const BSONObj& key, In
// If all fields to the left have hit the end of their intervals, we can't ask them
// to move forward and we should stop iterating.
- if (!spaceLeftToAdvance(firstNonContainedField, keyValues)) {
+ if (!spaceLeftToAdvance(firstNonContainedField, _keyValues)) {
return DONE;
}
diff --git a/src/mongo/db/query/index_bounds.h b/src/mongo/db/query/index_bounds.h
index c1964415edd..47b66ffe697 100644
--- a/src/mongo/db/query/index_bounds.h
+++ b/src/mongo/db/query/index_bounds.h
@@ -299,6 +299,8 @@ private:
// Direction of scan * direction of indexing.
std::vector<int> _expectedDirection;
+
+ std::vector<BSONElement> _keyValues;
};
} // namespace mongo
diff --git a/src/mongo/db/query/sbe_stage_builder_filter.cpp b/src/mongo/db/query/sbe_stage_builder_filter.cpp
index c22920e6616..0cdccc4323a 100644
--- a/src/mongo/db/query/sbe_stage_builder_filter.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_filter.cpp
@@ -696,8 +696,8 @@ void generateComparison(MatchExpressionVisitorContext* context,
auto makePredicate = [context, expr, binaryOp](sbe::value::SlotId inputSlot,
EvalStage inputStage) -> EvalExprStagePair {
const auto& rhs = expr->getData();
- auto [tagView, valView] = sbe::bson::convertFrom(
- true, rhs.rawdata(), rhs.rawdata() + rhs.size(), rhs.fieldNameSize() - 1);
+ auto [tagView, valView] = sbe::bson::convertFrom<true>(
+ rhs.rawdata(), rhs.rawdata() + rhs.size(), rhs.fieldNameSize() - 1);
// Most commonly the comparison does not do any kind of type conversions (i.e. 12 > "10"
// does not evaluate to true as we do not try to convert a string to a number). Internally,
@@ -1439,10 +1439,10 @@ public:
auto hasArray = false;
auto hasNull = false;
for (auto&& equality : equalities) {
- auto [tagView, valView] = sbe::bson::convertFrom(true,
- equality.rawdata(),
- equality.rawdata() + equality.size(),
- equality.fieldNameSize() - 1);
+ auto [tagView, valView] =
+ sbe::bson::convertFrom<true>(equality.rawdata(),
+ equality.rawdata() + equality.size(),
+ equality.fieldNameSize() - 1);
hasNull |= tagView == sbe::value::TypeTags::Null;
hasArray |= sbe::value::isArray(tagView);
diff --git a/src/mongo/db/query/sbe_stage_builder_helpers.cpp b/src/mongo/db/query/sbe_stage_builder_helpers.cpp
index 75db3c914fd..c340a31979c 100644
--- a/src/mongo/db/query/sbe_stage_builder_helpers.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_helpers.cpp
@@ -645,7 +645,7 @@ std::pair<sbe::value::TypeTags, sbe::value::Value> makeValue(const Value& val) {
auto obj = bob.done();
auto be = obj.objdata();
auto end = be + obj.objsize();
- return sbe::bson::convertFrom(false, be + 4, end, 0);
+ return sbe::bson::convertFrom<false>(be + 4, end, 0);
}
uint32_t dateTypeMask() {
diff --git a/src/mongo/db/sorter/sorter.cpp b/src/mongo/db/sorter/sorter.cpp
index 4a0aa54156f..330b3b6cc1f 100644
--- a/src/mongo/db/sorter/sorter.cpp
+++ b/src/mongo/db/sorter/sorter.cpp
@@ -159,6 +159,8 @@ public:
template <typename Container>
InMemIterator(const Container& input) : _data(input.begin(), input.end()) {}
+ InMemIterator(std::deque<Data> data) : _data(std::move(data)) {}
+
void openSource() {}
void closeSource() {}
@@ -622,6 +624,9 @@ public:
if (this->_iters.empty()) {
sort();
+ if (this->_opts.moveSortedDataIntoIterator) {
+ return new InMemIterator<Key, Value>(std::move(_data));
+ }
return new InMemIterator<Key, Value>(_data);
}
@@ -717,6 +722,9 @@ public:
Iterator* done() {
if (_haveData) {
+ if (this->_opts.moveSortedDataIntoIterator) {
+ return new InMemIterator<Key, Value>(std::move(_best));
+ }
return new InMemIterator<Key, Value>(_best);
} else {
return new InMemIterator<Key, Value>();
@@ -823,6 +831,9 @@ public:
Iterator* done() {
if (this->_iters.empty()) {
sort();
+ if (this->_opts.moveSortedDataIntoIterator) {
+ return new InMemIterator<Key, Value>(std::move(_data));
+ }
return new InMemIterator<Key, Value>(_data);
}
diff --git a/src/mongo/db/sorter/sorter.h b/src/mongo/db/sorter/sorter.h
index 782d0389f57..423638d7a8a 100644
--- a/src/mongo/db/sorter/sorter.h
+++ b/src/mongo/db/sorter/sorter.h
@@ -114,7 +114,15 @@ struct SortOptions {
// extSortAllowed is true.
std::string tempDir;
- SortOptions() : limit(0), maxMemoryUsageBytes(64 * 1024 * 1024), extSortAllowed(false) {}
+ // If set to true and sorted data fits into memory, sorted data will be moved into iterator
+ // instead of copying.
+ bool moveSortedDataIntoIterator;
+
+ SortOptions()
+ : limit(0),
+ maxMemoryUsageBytes(64 * 1024 * 1024),
+ extSortAllowed(false),
+ moveSortedDataIntoIterator(false) {}
// Fluent API to support expressions like SortOptions().Limit(1000).ExtSortAllowed(true)
@@ -142,6 +150,11 @@ struct SortOptions {
dbName = std::move(newDbName);
return *this;
}
+
+ SortOptions& MoveSortedDataIntoIterator(bool newMoveSortedDataIntoIterator = true) {
+ moveSortedDataIntoIterator = newMoveSortedDataIntoIterator;
+ return *this;
+ }
};
/**
diff --git a/src/mongo/db/storage/key_string.cpp b/src/mongo/db/storage/key_string.cpp
index f5cb04f3b20..fd79e6a9ada 100644
--- a/src/mongo/db/storage/key_string.cpp
+++ b/src/mongo/db/storage/key_string.cpp
@@ -2443,8 +2443,11 @@ Discriminator decodeDiscriminator(const char* buffer,
return Discriminator::kInclusive;
}
-BSONObj toBsonSafe(const char* buffer, size_t len, Ordering ord, const TypeBits& typeBits) {
- BSONObjBuilder builder;
+void toBsonSafe(const char* buffer,
+ size_t len,
+ Ordering ord,
+ const TypeBits& typeBits,
+ BSONObjBuilder& builder) {
BufReader reader(buffer, len);
TypeBits::Reader typeBitsReader(typeBits);
for (int i = 0; reader.remaining(); i++) {
@@ -2462,6 +2465,11 @@ BSONObj toBsonSafe(const char* buffer, size_t len, Ordering ord, const TypeBits&
break;
toBsonValue(ctype, &reader, &typeBitsReader, invert, typeBits.version, &(builder << ""), 1);
}
+}
+
+BSONObj toBsonSafe(const char* buffer, size_t len, Ordering ord, const TypeBits& typeBits) {
+ BSONObjBuilder builder;
+ toBsonSafe(buffer, len, ord, typeBits, builder);
return builder.obj();
}
diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h
index 57d16141241..68fc1cdeb53 100644
--- a/src/mongo/db/storage/key_string.h
+++ b/src/mongo/db/storage/key_string.h
@@ -933,6 +933,8 @@ size_t getKeySize(const char* buffer, size_t len, Ordering ord, const TypeBits&
BSONObj toBson(StringData data, Ordering ord, const TypeBits& types);
BSONObj toBson(const char* buffer, size_t len, Ordering ord, const TypeBits& types) noexcept;
BSONObj toBsonSafe(const char* buffer, size_t len, Ordering ord, const TypeBits& types);
+void toBsonSafe(
+ const char* buffer, size_t len, Ordering ord, const TypeBits& types, BSONObjBuilder& builder);
Discriminator decodeDiscriminator(const char* buffer,
size_t len,
Ordering ord,
diff --git a/src/mongo/platform/bits.h b/src/mongo/platform/bits.h
index b12bda75b3e..606ad9d830e 100644
--- a/src/mongo/platform/bits.h
+++ b/src/mongo/platform/bits.h
@@ -41,10 +41,24 @@ namespace mongo {
inline int countLeadingZeros64(unsigned long long num);
/**
+ * Returns the number of leading 0-bits in num. The result is undefined if num is 0.
+ */
+inline int countLeadingZerosNonZero64(unsigned long long num);
+
+/**
* Returns the number of trailing 0-bits in num. Returns 64 if num is 0.
*/
inline int countTrailingZeros64(unsigned long long num);
+/**
+ * Returns the number of trailing 0-bits in num. The result is undefined if num is 0.
+ */
+inline int countTrailingZerosNonZero64(unsigned long long num);
+
+/**
+ * Same as above, but for int.
+ */
+inline int countTrailingZerosNonZero32(unsigned int num);
#if defined(__GNUC__)
int countLeadingZeros64(unsigned long long num) {
@@ -53,11 +67,24 @@ int countLeadingZeros64(unsigned long long num) {
return __builtin_clzll(num);
}
+int countLeadingZerosNonZero64(unsigned long long num) {
+ return __builtin_clzll(num);
+}
+
int countTrailingZeros64(unsigned long long num) {
if (num == 0)
return 64;
return __builtin_ctzll(num);
}
+
+int countTrailingZerosNonZero64(unsigned long long num) {
+ return __builtin_ctzll(num);
+}
+
+int countTrailingZerosNonZero32(unsigned int num) {
+ return __builtin_ctz(num);
+}
+
#elif defined(_MSC_VER) && defined(_WIN64)
int countLeadingZeros64(unsigned long long num) {
unsigned long out;
@@ -66,12 +93,31 @@ int countLeadingZeros64(unsigned long long num) {
return 64;
}
+int countLeadingZerosNonZero64(unsigned long long num) {
+ unsigned long out;
+ _BitScanReverse64(&out, num);
+ return 63 ^ out;
+}
+
int countTrailingZeros64(unsigned long long num) {
unsigned long out;
if (_BitScanForward64(&out, num))
return out;
return 64;
}
+
+int countTrailingZerosNonZero64(unsigned long long num) {
+ unsigned long out;
+ _BitScanForward64(&out, num);
+ return out;
+}
+
+int countTrailingZerosNonZero32(unsigned int num) {
+ unsigned long out;
+ _BitScanForward(&out, static_cast<unsigned long>(num));
+ return out;
+}
+
#elif defined(_MSC_VER) && defined(_WIN32)
int countLeadingZeros64(unsigned long long num) {
unsigned long out;
@@ -82,6 +128,14 @@ int countLeadingZeros64(unsigned long long num) {
return 64;
}
+int countLeadingZerosNonZero64(unsigned long long num) {
+ unsigned long out;
+ if (_BitScanReverse(&out, static_cast<unsigned long>(num >> 32)))
+ return 31 ^ out;
+ _BitScanReverse(&out, static_cast<unsigned long>(num));
+ return 63 ^ out;
+}
+
int countTrailingZeros64(unsigned long long num) {
unsigned long out;
if (_BitScanForward(&out, static_cast<unsigned long>(num)))
@@ -90,6 +144,21 @@ int countTrailingZeros64(unsigned long long num) {
return out + 32;
return 64;
}
+
+int countTrailingZerosNonZero64(unsigned long long num) {
+ unsigned long out;
+ if (_BitScanForward(&out, static_cast<unsigned long>(num)))
+ return out;
+
+ _BitScanForward(&out, static_cast<unsigned long>(num >> 32));
+ return out + 32;
+}
+
+int countTrailingZerosNonZero32(unsigned int num) {
+ unsigned long out;
+ _BitScanForward(&out, static_cast<unsigned long>(num));
+ return out;
+}
#else
#error "No bit-ops definitions for your platform"
#endif