diff options
Diffstat (limited to 'src/mongo')
22 files changed, 275 insertions, 51 deletions
diff --git a/src/mongo/db/exec/sbe/sbe_mkobj_test.cpp b/src/mongo/db/exec/sbe/sbe_mkobj_test.cpp index e107643e51c..644d2df4d4b 100644 --- a/src/mongo/db/exec/sbe/sbe_mkobj_test.cpp +++ b/src/mongo/db/exec/sbe/sbe_mkobj_test.cpp @@ -51,7 +51,7 @@ public: const char* end = be + obj.objsize(); while (*be != 0) { auto sv = bson::fieldNameView(be); - auto [tag, val] = bson::convertFrom(false, be, end, sv.size()); + auto [tag, val] = bson::convertFrom<false>(be, end, sv.size()); be = bson::advance(be, sv.size()); objView->push_back(sv, tag, val); diff --git a/src/mongo/db/exec/sbe/sbe_test.cpp b/src/mongo/db/exec/sbe/sbe_test.cpp index ad7529e96bd..ced20818106 100644 --- a/src/mongo/db/exec/sbe/sbe_test.cpp +++ b/src/mongo/db/exec/sbe/sbe_test.cpp @@ -401,4 +401,52 @@ TEST(SBEVM, ConvertBinDataToBsonObj) { ASSERT_EQ(originalBinData.woCompare(convertedBinData), 0); } + +namespace { + +/** + * Fills bytes after the null terminator in the string with 'pattern'. + * + * We use this function in the tests to ensure that the implementation of 'getStringLength' for + * 'StringSmall' type does not rely on the fact that all bytes after null terminator are zero or any + * other special value. + */ +void fillSmallStringTail(value::Value val, char pattern) { + char* rawView = value::getRawStringView(value::TypeTags::StringSmall, val); + for (auto i = std::strlen(rawView) + 1; i <= value::kSmallStringMaxLength; i++) { + rawView[i] = pattern; + } +} +} // namespace + +TEST(SBESmallString, Length) { + std::vector<std::pair<std::string, size_t>> testCases{ + {"", 0}, + {"a", 1}, + {"ab", 2}, + {"abc", 3}, + {"abcd", 4}, + {"abcde", 5}, + {"abcdef", 6}, + {"abcdefh", 7}, + }; + + for (const auto& [string, length] : testCases) { + ASSERT(value::canUseSmallString(string)); + auto [tag, val] = value::makeSmallString(string); + ASSERT_EQ(tag, value::TypeTags::StringSmall); + + fillSmallStringTail(val, char(0)); + ASSERT_EQ(length, value::getStringLength(tag, val)); + + fillSmallStringTail(val, char(1)); + ASSERT_EQ(length, value::getStringLength(tag, val)); + + fillSmallStringTail(val, char(1 << 7)); + ASSERT_EQ(length, value::getStringLength(tag, val)); + + fillSmallStringTail(val, ~char(0)); + ASSERT_EQ(length, value::getStringLength(tag, val)); + } +} } // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/stages/bson_scan.cpp b/src/mongo/db/exec/sbe/stages/bson_scan.cpp index 430977f3ac4..2599cf0bae8 100644 --- a/src/mongo/db/exec/sbe/stages/bson_scan.cpp +++ b/src/mongo/db/exec/sbe/stages/bson_scan.cpp @@ -109,7 +109,7 @@ PlanState BSONScanStage::getNext() { auto sv = bson::fieldNameView(be); if (auto it = _fieldAccessors.find(sv); it != _fieldAccessors.end()) { // Found the field so convert it to Value. - auto [tag, val] = bson::convertFrom(true, be, end, sv.size()); + auto [tag, val] = bson::convertFrom<true>(be, end, sv.size()); it->second->reset(tag, val); diff --git a/src/mongo/db/exec/sbe/stages/check_bounds.cpp b/src/mongo/db/exec/sbe/stages/check_bounds.cpp index b80e197cb10..2bc86c2ccd5 100644 --- a/src/mongo/db/exec/sbe/stages/check_bounds.cpp +++ b/src/mongo/db/exec/sbe/stages/check_bounds.cpp @@ -95,10 +95,14 @@ PlanState CheckBoundsStage::getNext() { keyTag == value::TypeTags::ksValue); auto key = value::getKeyStringView(keyVal); - auto bsonKey = KeyString::toBson(*key, _params.ord); - IndexSeekPoint seekPoint; - switch (_checker.checkKey(bsonKey, &seekPoint)) { + _keyBuffer.reset(); + BSONObjBuilder keyBuilder(_keyBuffer); + KeyString::toBsonSafe( + key->getBuffer(), key->getSize(), _params.ord, key->getTypeBits(), keyBuilder); + auto bsonKey = keyBuilder.done(); + + switch (_checker.checkKey(bsonKey, &_seekPoint)) { case IndexBoundsChecker::VALID: { auto [tag, val] = _inRecordIdAccessor->getViewOfValue(); _outAccessor.reset(false, tag, val); @@ -112,7 +116,7 @@ PlanState CheckBoundsStage::getNext() { case IndexBoundsChecker::MUST_ADVANCE: { auto seekKey = std::make_unique<KeyString::Value>( IndexEntryComparison::makeKeyStringFromSeekPointForSeek( - seekPoint, _params.version, _params.ord, _params.direction == 1)); + _seekPoint, _params.version, _params.ord, _params.direction == 1)); _outAccessor.reset(true, value::TypeTags::ksValue, value::bitcastFrom<KeyString::Value*>(seekKey.release())); diff --git a/src/mongo/db/exec/sbe/stages/check_bounds.h b/src/mongo/db/exec/sbe/stages/check_bounds.h index c4ccdf7d37e..ae9a4bb3171 100644 --- a/src/mongo/db/exec/sbe/stages/check_bounds.h +++ b/src/mongo/db/exec/sbe/stages/check_bounds.h @@ -101,5 +101,8 @@ private: bool _isEOF{false}; CheckBoundsStats _specificStats; + + BufBuilder _keyBuffer; + IndexSeekPoint _seekPoint; }; } // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/stages/makeobj.cpp b/src/mongo/db/exec/sbe/stages/makeobj.cpp index ce571bd47d7..dc7f79a7159 100644 --- a/src/mongo/db/exec/sbe/stages/makeobj.cpp +++ b/src/mongo/db/exec/sbe/stages/makeobj.cpp @@ -173,7 +173,7 @@ void MakeObjStageBase<MakeObjOutputType::object>::produceObject() { } if (!projected && !isFieldRestricted(key)) { - auto [tag, val] = bson::convertFrom(true, be, end, sv.size()); + auto [tag, val] = bson::convertFrom<true>(be, end, sv.size()); auto [copyTag, copyVal] = value::copyValue(tag, val); obj->push_back(sv, copyTag, copyVal); --nFieldsNeededIfInclusion; diff --git a/src/mongo/db/exec/sbe/stages/scan.cpp b/src/mongo/db/exec/sbe/stages/scan.cpp index 791fe61e5f8..19080f474f9 100644 --- a/src/mongo/db/exec/sbe/stages/scan.cpp +++ b/src/mongo/db/exec/sbe/stages/scan.cpp @@ -338,7 +338,7 @@ PlanState ScanStage::getNext() { auto sv = bson::fieldNameView(be); if (auto it = _fieldAccessors.find(sv); it != _fieldAccessors.end()) { // Found the field so convert it to Value. - auto [tag, val] = bson::convertFrom(true, be, end, sv.size()); + auto [tag, val] = bson::convertFrom<true>(be, end, sv.size()); if (_oplogTsAccessor && it->first == repl::OpTime::kTimestampFieldName) { auto&& [ownedTag, ownedVal] = value::copyValue(tag, val); @@ -798,7 +798,7 @@ PlanState ParallelScanStage::getNext() { auto sv = bson::fieldNameView(be); if (auto it = _fieldAccessors.find(sv); it != _fieldAccessors.end()) { // Found the field so convert it to Value. - auto [tag, val] = bson::convertFrom(true, be, end, sv.size()); + auto [tag, val] = bson::convertFrom<true>(be, end, sv.size()); it->second->reset(false, tag, val); diff --git a/src/mongo/db/exec/sbe/stages/sort.cpp b/src/mongo/db/exec/sbe/stages/sort.cpp index 75490e0db5b..89d4b8b2822 100644 --- a/src/mongo/db/exec/sbe/stages/sort.cpp +++ b/src/mongo/db/exec/sbe/stages/sort.cpp @@ -125,6 +125,7 @@ void SortStage::makeSorter() { opts.extSortAllowed = _allowDiskUse; opts.limit = _specificStats.limit != std::numeric_limits<size_t>::max() ? _specificStats.limit : 0; + opts.moveSortedDataIntoIterator = true; auto comp = [&](const SorterData& lhs, const SorterData& rhs) { auto size = lhs.first.size(); diff --git a/src/mongo/db/exec/sbe/values/bson.cpp b/src/mongo/db/exec/sbe/values/bson.cpp index 2d5a0cd9bcb..c0dd70eb4d7 100644 --- a/src/mongo/db/exec/sbe/values/bson.cpp +++ b/src/mongo/db/exec/sbe/values/bson.cpp @@ -104,8 +104,8 @@ const char* advance(const char* be, size_t fieldNameSize) { return be; } -std::pair<value::TypeTags, value::Value> convertFrom(bool view, - const char* be, +template <bool View> +std::pair<value::TypeTags, value::Value> convertFrom(const char* be, const char* end, size_t fieldNameSize) { auto type = static_cast<BSONType>(static_cast<signed char>(*be)); @@ -118,14 +118,14 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view, return {value::TypeTags::NumberDouble, value::bitcastFrom<double>(dbl)}; } case BSONType::NumberDecimal: { - if (view) { + if constexpr (View) { return {value::TypeTags::NumberDecimal, value::bitcastFrom<const char*>(be)}; } return value::makeCopyDecimal(value::readDecimal128FromMemory(ConstDataView{be})); } case BSONType::String: { - if (view) { + if constexpr (View) { return {value::TypeTags::bsonString, value::bitcastFrom<const char*>(be)}; } // len includes trailing zero. @@ -146,14 +146,14 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view, } case BSONType::Symbol: { auto value = value::bitcastFrom<const char*>(be); - if (view) { + if constexpr (View) { return {value::TypeTags::bsonSymbol, value}; } return value::makeNewBsonSymbol( value::getStringOrSymbolView(value::TypeTags::bsonSymbol, value)); } case BSONType::BinData: { - if (view) { + if constexpr (View) { return {value::TypeTags::bsonBinData, value::bitcastFrom<const char*>(be)}; } @@ -174,7 +174,7 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view, } } case BSONType::Object: { - if (view) { + if constexpr (View) { return {value::TypeTags::bsonObject, value::bitcastFrom<const char*>(be)}; } // Skip document length. @@ -185,7 +185,7 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view, while (*be != 0) { auto sv = bson::fieldNameView(be); - auto [tag, val] = convertFrom(false, be, end, sv.size()); + auto [tag, val] = convertFrom<false>(be, end, sv.size()); obj->push_back(sv, tag, val); be = advance(be, sv.size()); @@ -193,7 +193,7 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view, return {tag, val}; } case BSONType::Array: { - if (view) { + if constexpr (View) { return {value::TypeTags::bsonArray, value::bitcastFrom<const char*>(be)}; } // Skip array length. @@ -204,7 +204,7 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view, while (*be != 0) { auto sv = bson::fieldNameView(be); - auto [tag, val] = convertFrom(false, be, end, sv.size()); + auto [tag, val] = convertFrom<false>(be, end, sv.size()); arr->push_back(tag, val); be = advance(be, sv.size()); @@ -212,7 +212,7 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view, return {tag, val}; } case BSONType::jstOID: { - if (view) { + if constexpr (View) { return {value::TypeTags::bsonObjectId, value::bitcastFrom<const char*>(be)}; } auto [tag, val] = value::makeNewObjectId(); @@ -247,28 +247,28 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view, return {value::TypeTags::bsonUndefined, 0}; case BSONType::RegEx: { auto value = value::bitcastFrom<const char*>(be); - if (view) { + if constexpr (View) { return {value::TypeTags::bsonRegex, value}; } return value::makeCopyBsonRegex(value::getBsonRegexView(value)); } case BSONType::Code: { auto value = value::bitcastFrom<const char*>(be); - if (view) { + if constexpr (View) { return {value::TypeTags::bsonJavascript, value}; } return value::makeCopyBsonJavascript(value::getBsonJavascriptView(value)); } case BSONType::DBRef: { auto value = value::bitcastFrom<const char*>(be); - if (view) { + if constexpr (View) { return {value::TypeTags::bsonDBPointer, value}; } return value::makeCopyBsonDBPointer(value::getBsonDBPointerView(value)); } case BSONType::CodeWScope: { auto value = value::bitcastFrom<const char*>(be); - if (view) { + if constexpr (View) { return {value::TypeTags::bsonCodeWScope, value}; } return value::makeCopyBsonCodeWScope(value::getBsonCodeWScopeView(value)); @@ -278,6 +278,14 @@ std::pair<value::TypeTags, value::Value> convertFrom(bool view, } } +template std::pair<value::TypeTags, value::Value> convertFrom<false>(const char* be, + const char* end, + size_t fieldNameSize); + +template std::pair<value::TypeTags, value::Value> convertFrom<true>(const char* be, + const char* end, + size_t fieldNameSize); + template <class ArrayBuilder> void convertToBsonObj(ArrayBuilder& builder, value::ArrayEnumerator arr) { for (; !arr.atEnd(); arr.advance()) { diff --git a/src/mongo/db/exec/sbe/values/bson.h b/src/mongo/db/exec/sbe/values/bson.h index b1dfaa74a8a..73e0cd272e8 100644 --- a/src/mongo/db/exec/sbe/values/bson.h +++ b/src/mongo/db/exec/sbe/values/bson.h @@ -35,8 +35,8 @@ namespace mongo { namespace sbe { namespace bson { -std::pair<value::TypeTags, value::Value> convertFrom(bool view, - const char* be, +template <bool View> +std::pair<value::TypeTags, value::Value> convertFrom(const char* be, const char* end, size_t fieldNameSize); diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp index 26c91f01a4f..58da589319b 100644 --- a/src/mongo/db/exec/sbe/values/value.cpp +++ b/src/mongo/db/exec/sbe/values/value.cpp @@ -1137,7 +1137,7 @@ std::pair<TypeTags, Value> ArrayEnumerator::getViewOfValue() const { return {_iter->first, _iter->second}; } else { auto sv = bson::fieldNameView(_arrayCurrent); - return bson::convertFrom(true, _arrayCurrent, _arrayEnd, sv.size()); + return bson::convertFrom<true>(_arrayCurrent, _arrayEnd, sv.size()); } } @@ -1169,7 +1169,7 @@ std::pair<TypeTags, Value> ObjectEnumerator::getViewOfValue() const { return _object->getAt(_index); } else { auto sv = bson::fieldNameView(_objectCurrent); - return bson::convertFrom(true, _objectCurrent, _objectEnd, sv.size()); + return bson::convertFrom<true>(_objectCurrent, _objectEnd, sv.size()); } } diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h index a9b891f5567..f5138e8b9f5 100644 --- a/src/mongo/db/exec/sbe/values/value.h +++ b/src/mongo/db/exec/sbe/values/value.h @@ -33,6 +33,7 @@ #include <absl/container/flat_hash_set.h> #include <array> #include <bitset> +#include <boost/predef/hardware/simd.h> #include <cstdint> #include <ostream> #include <pcre.h> @@ -47,7 +48,9 @@ #include "mongo/db/fts/fts_matcher.h" #include "mongo/db/query/bson_typemask.h" #include "mongo/db/query/collation/collator_interface.h" +#include "mongo/platform/bits.h" #include "mongo/platform/decimal128.h" +#include "mongo/platform/endian.h" #include "mongo/util/assert_util.h" #include "mongo/util/represent_as.h" @@ -693,7 +696,56 @@ inline const char* getRawStringView(TypeTags tag, const Value& val) noexcept { */ inline size_t getStringLength(TypeTags tag, const Value& val) noexcept { if (tag == TypeTags::StringSmall) { - return strlen(reinterpret_cast<const char*>(&val)); + // This path turned out to be very hot in our benchmarks, so we avoid calling 'strlen()' and + // use an alternative approach to compute string length. + // NOTE: Small string value always contains exactly one zero byte, marking the end of the + // string. Bytes after this zero byte can have arbitrary value. +#if defined(BOOST_HW_SIMD_X86_AVAILABLE) && BOOST_HW_SIMD_X86 >= BOOST_HW_SIMD_X86_SSE2_VERSION + // If SSE2 instruction set is available, we use SIMD instructions. There are several steps: + // (1) _mm_cvtsi64_si128(val) - Copy string value into the 128-bit register + // (2) _mm_cmpeq_epi8 - Make each zero byte equal to 0xFF. Other bytes become zero + // (3) _mm_movemask_epi8 - Copy most significant bit of each byte into int + // (4) countTrailingZerosNonZeroInt - Get the position of the first trailing bit set + static_assert(endian::Order::kNative == endian::Order::kLittle); + int ret = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_cvtsi64_si128(val), _mm_setzero_si128())); + return countTrailingZerosNonZero32(ret); +#else + // If SSE2 is not available, we use bit magic. + const uint64_t magic = 0x7F7F7F7F7F7F7F7FULL; + + // This is based on a trick from following link, which describes how to make an expression + // which results in '0' when ALL bytes are non-zero, and results in zero when ANY byte is + // zero. Instead of casting this result to bool, we count how many complete 0 bytes there + // are in 'ret'. This tells us the length of the string. + // https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord + + // At the end of this, ret will store a value where each byte which was all 0's is now + // 10000000 and any byte that was anything non zero is now 0. + + // (1) compute (val & magic). This clears the highest bit in each byte. + uint64_t ret = val & magic; + + // (2) add magic to the above expression. The result is that, from overflow, + // the high bit will be set if any bit was set in 'v' other than the high bit. + ret += magic; + // (3) OR this result with v. This ensures that if the high bit in 'v' was set + // then the high bit in our result will be set. + ret |= val; + // (4) OR with magic. This will set any low bits which were not already set. At this point + // each byte is either all ones or 01111111. + ret |= magic; + + // When we invert this, each byte will either be + // all zeros (previously non zero) or 10000000 (previously 0). + ret = ~ret; + + // So all we have to do is count how many complete 0 bytes there are from one end. + if constexpr (endian::Order::kNative == endian::Order::kLittle) { + return (countTrailingZerosNonZero64(ret) >> 3); + } else { + return (countLeadingZerosNonZero64(ret) >> 3); + } +#endif } else if (tag == TypeTags::StringBig || tag == TypeTags::bsonString) { return ConstDataView(getRawPointerView(val)).read<LittleEndian<int32_t>>() - 1; } diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp index 2ca3465b102..ef924ae8972 100644 --- a/src/mongo/db/exec/sbe/vm/vm.cpp +++ b/src/mongo/db/exec/sbe/vm/vm.cpp @@ -487,7 +487,7 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::getField(value::TypeTa auto sv = bson::fieldNameView(be); if (sv == fieldStr) { - auto [tag, val] = bson::convertFrom(true, be, end, sv.size()); + auto [tag, val] = bson::convertFrom<true>(be, end, sv.size()); return {false, tag, val}; } @@ -844,7 +844,7 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinDropFields(Arit auto sv = bson::fieldNameView(be); if (restrictFieldsSet.count(sv) == 0) { - auto [tag, val] = bson::convertFrom(false, be, end, sv.size()); + auto [tag, val] = bson::convertFrom<false>(be, end, sv.size()); obj->push_back(sv, tag, val); } @@ -2562,7 +2562,7 @@ std::pair<value::TypeTags, value::Value> collComparisonKey(value::TypeTags tag, auto ptr = outputView.objdata(); auto be = ptr + 4; auto end = ptr + ConstDataView(ptr).read<LittleEndian<uint32_t>>(); - return bson::convertFrom(false, be, end, 0); + return bson::convertFrom<false>(be, end, 0); } } // namespace diff --git a/src/mongo/db/query/index_bounds.cpp b/src/mongo/db/query/index_bounds.cpp index fb638d39727..8703368fcbd 100644 --- a/src/mongo/db/query/index_bounds.cpp +++ b/src/mongo/db/query/index_bounds.cpp @@ -491,6 +491,8 @@ IndexBoundsChecker::IndexBoundsChecker(const IndexBounds* bounds, const BSONObj& keyPattern, int scanDirection) : _bounds(bounds), _curInterval(bounds->fields.size(), 0) { + _keyValues.resize(_curInterval.size()); + BSONObjIterator it(keyPattern); while (it.more()) { int indexDirection = it.next().number() >= 0 ? 1 : -1; @@ -588,19 +590,20 @@ IndexBoundsChecker::KeyState IndexBoundsChecker::checkKey(const BSONObj& key, In out->suffixInclusive.resize(_curInterval.size()); // It's useful later to go from a field number to the value for that field. Store these. - // TODO: on optimization pass, populate the vector as-needed and keep the vector around as a - // member variable - vector<BSONElement> keyValues; + size_t i = 0; BSONObjIterator keyIt(key); while (keyIt.more()) { - keyValues.push_back(keyIt.next()); + verify(i < _curInterval.size()); + + _keyValues[i] = keyIt.next(); + i++; } - verify(keyValues.size() == _curInterval.size()); + verify(i == _curInterval.size()); size_t firstNonContainedField; Location orientation; - if (!findLeftmostProblem(keyValues, &firstNonContainedField, &orientation)) { + if (!findLeftmostProblem(_keyValues, &firstNonContainedField, &orientation)) { // All fields in the index are within the current interval. Caller can use the key. return VALID; } @@ -616,7 +619,7 @@ IndexBoundsChecker::KeyState IndexBoundsChecker::checkKey(const BSONObj& key, In // ...and try again. This call modifies 'orientation', so we may check its value again // in the clause below if field number 'firstNonContainedField' isn't in its first // interval. - if (!findLeftmostProblem(keyValues, &firstNonContainedField, &orientation)) { + if (!findLeftmostProblem(_keyValues, &firstNonContainedField, &orientation)) { return VALID; } } @@ -646,7 +649,7 @@ IndexBoundsChecker::KeyState IndexBoundsChecker::checkKey(const BSONObj& key, In // Find the interval that contains our field. size_t newIntervalForField; - Location where = findIntervalForField(keyValues[firstNonContainedField], + Location where = findIntervalForField(_keyValues[firstNonContainedField], _bounds->fields[firstNonContainedField], _expectedDirection[firstNonContainedField], &newIntervalForField); @@ -686,7 +689,7 @@ IndexBoundsChecker::KeyState IndexBoundsChecker::checkKey(const BSONObj& key, In // If all fields to the left have hit the end of their intervals, we can't ask them // to move forward and we should stop iterating. - if (!spaceLeftToAdvance(firstNonContainedField, keyValues)) { + if (!spaceLeftToAdvance(firstNonContainedField, _keyValues)) { return DONE; } diff --git a/src/mongo/db/query/index_bounds.h b/src/mongo/db/query/index_bounds.h index c1964415edd..47b66ffe697 100644 --- a/src/mongo/db/query/index_bounds.h +++ b/src/mongo/db/query/index_bounds.h @@ -299,6 +299,8 @@ private: // Direction of scan * direction of indexing. std::vector<int> _expectedDirection; + + std::vector<BSONElement> _keyValues; }; } // namespace mongo diff --git a/src/mongo/db/query/sbe_stage_builder_filter.cpp b/src/mongo/db/query/sbe_stage_builder_filter.cpp index c22920e6616..0cdccc4323a 100644 --- a/src/mongo/db/query/sbe_stage_builder_filter.cpp +++ b/src/mongo/db/query/sbe_stage_builder_filter.cpp @@ -696,8 +696,8 @@ void generateComparison(MatchExpressionVisitorContext* context, auto makePredicate = [context, expr, binaryOp](sbe::value::SlotId inputSlot, EvalStage inputStage) -> EvalExprStagePair { const auto& rhs = expr->getData(); - auto [tagView, valView] = sbe::bson::convertFrom( - true, rhs.rawdata(), rhs.rawdata() + rhs.size(), rhs.fieldNameSize() - 1); + auto [tagView, valView] = sbe::bson::convertFrom<true>( + rhs.rawdata(), rhs.rawdata() + rhs.size(), rhs.fieldNameSize() - 1); // Most commonly the comparison does not do any kind of type conversions (i.e. 12 > "10" // does not evaluate to true as we do not try to convert a string to a number). Internally, @@ -1439,10 +1439,10 @@ public: auto hasArray = false; auto hasNull = false; for (auto&& equality : equalities) { - auto [tagView, valView] = sbe::bson::convertFrom(true, - equality.rawdata(), - equality.rawdata() + equality.size(), - equality.fieldNameSize() - 1); + auto [tagView, valView] = + sbe::bson::convertFrom<true>(equality.rawdata(), + equality.rawdata() + equality.size(), + equality.fieldNameSize() - 1); hasNull |= tagView == sbe::value::TypeTags::Null; hasArray |= sbe::value::isArray(tagView); diff --git a/src/mongo/db/query/sbe_stage_builder_helpers.cpp b/src/mongo/db/query/sbe_stage_builder_helpers.cpp index 75db3c914fd..c340a31979c 100644 --- a/src/mongo/db/query/sbe_stage_builder_helpers.cpp +++ b/src/mongo/db/query/sbe_stage_builder_helpers.cpp @@ -645,7 +645,7 @@ std::pair<sbe::value::TypeTags, sbe::value::Value> makeValue(const Value& val) { auto obj = bob.done(); auto be = obj.objdata(); auto end = be + obj.objsize(); - return sbe::bson::convertFrom(false, be + 4, end, 0); + return sbe::bson::convertFrom<false>(be + 4, end, 0); } uint32_t dateTypeMask() { diff --git a/src/mongo/db/sorter/sorter.cpp b/src/mongo/db/sorter/sorter.cpp index 4a0aa54156f..330b3b6cc1f 100644 --- a/src/mongo/db/sorter/sorter.cpp +++ b/src/mongo/db/sorter/sorter.cpp @@ -159,6 +159,8 @@ public: template <typename Container> InMemIterator(const Container& input) : _data(input.begin(), input.end()) {} + InMemIterator(std::deque<Data> data) : _data(std::move(data)) {} + void openSource() {} void closeSource() {} @@ -622,6 +624,9 @@ public: if (this->_iters.empty()) { sort(); + if (this->_opts.moveSortedDataIntoIterator) { + return new InMemIterator<Key, Value>(std::move(_data)); + } return new InMemIterator<Key, Value>(_data); } @@ -717,6 +722,9 @@ public: Iterator* done() { if (_haveData) { + if (this->_opts.moveSortedDataIntoIterator) { + return new InMemIterator<Key, Value>(std::move(_best)); + } return new InMemIterator<Key, Value>(_best); } else { return new InMemIterator<Key, Value>(); @@ -823,6 +831,9 @@ public: Iterator* done() { if (this->_iters.empty()) { sort(); + if (this->_opts.moveSortedDataIntoIterator) { + return new InMemIterator<Key, Value>(std::move(_data)); + } return new InMemIterator<Key, Value>(_data); } diff --git a/src/mongo/db/sorter/sorter.h b/src/mongo/db/sorter/sorter.h index 782d0389f57..423638d7a8a 100644 --- a/src/mongo/db/sorter/sorter.h +++ b/src/mongo/db/sorter/sorter.h @@ -114,7 +114,15 @@ struct SortOptions { // extSortAllowed is true. std::string tempDir; - SortOptions() : limit(0), maxMemoryUsageBytes(64 * 1024 * 1024), extSortAllowed(false) {} + // If set to true and sorted data fits into memory, sorted data will be moved into iterator + // instead of copying. + bool moveSortedDataIntoIterator; + + SortOptions() + : limit(0), + maxMemoryUsageBytes(64 * 1024 * 1024), + extSortAllowed(false), + moveSortedDataIntoIterator(false) {} // Fluent API to support expressions like SortOptions().Limit(1000).ExtSortAllowed(true) @@ -142,6 +150,11 @@ struct SortOptions { dbName = std::move(newDbName); return *this; } + + SortOptions& MoveSortedDataIntoIterator(bool newMoveSortedDataIntoIterator = true) { + moveSortedDataIntoIterator = newMoveSortedDataIntoIterator; + return *this; + } }; /** diff --git a/src/mongo/db/storage/key_string.cpp b/src/mongo/db/storage/key_string.cpp index f5cb04f3b20..fd79e6a9ada 100644 --- a/src/mongo/db/storage/key_string.cpp +++ b/src/mongo/db/storage/key_string.cpp @@ -2443,8 +2443,11 @@ Discriminator decodeDiscriminator(const char* buffer, return Discriminator::kInclusive; } -BSONObj toBsonSafe(const char* buffer, size_t len, Ordering ord, const TypeBits& typeBits) { - BSONObjBuilder builder; +void toBsonSafe(const char* buffer, + size_t len, + Ordering ord, + const TypeBits& typeBits, + BSONObjBuilder& builder) { BufReader reader(buffer, len); TypeBits::Reader typeBitsReader(typeBits); for (int i = 0; reader.remaining(); i++) { @@ -2462,6 +2465,11 @@ BSONObj toBsonSafe(const char* buffer, size_t len, Ordering ord, const TypeBits& break; toBsonValue(ctype, &reader, &typeBitsReader, invert, typeBits.version, &(builder << ""), 1); } +} + +BSONObj toBsonSafe(const char* buffer, size_t len, Ordering ord, const TypeBits& typeBits) { + BSONObjBuilder builder; + toBsonSafe(buffer, len, ord, typeBits, builder); return builder.obj(); } diff --git a/src/mongo/db/storage/key_string.h b/src/mongo/db/storage/key_string.h index 57d16141241..68fc1cdeb53 100644 --- a/src/mongo/db/storage/key_string.h +++ b/src/mongo/db/storage/key_string.h @@ -933,6 +933,8 @@ size_t getKeySize(const char* buffer, size_t len, Ordering ord, const TypeBits& BSONObj toBson(StringData data, Ordering ord, const TypeBits& types); BSONObj toBson(const char* buffer, size_t len, Ordering ord, const TypeBits& types) noexcept; BSONObj toBsonSafe(const char* buffer, size_t len, Ordering ord, const TypeBits& types); +void toBsonSafe( + const char* buffer, size_t len, Ordering ord, const TypeBits& types, BSONObjBuilder& builder); Discriminator decodeDiscriminator(const char* buffer, size_t len, Ordering ord, diff --git a/src/mongo/platform/bits.h b/src/mongo/platform/bits.h index b12bda75b3e..606ad9d830e 100644 --- a/src/mongo/platform/bits.h +++ b/src/mongo/platform/bits.h @@ -41,10 +41,24 @@ namespace mongo { inline int countLeadingZeros64(unsigned long long num); /** + * Returns the number of leading 0-bits in num. The result is undefined if num is 0. + */ +inline int countLeadingZerosNonZero64(unsigned long long num); + +/** * Returns the number of trailing 0-bits in num. Returns 64 if num is 0. */ inline int countTrailingZeros64(unsigned long long num); +/** + * Returns the number of trailing 0-bits in num. The result is undefined if num is 0. + */ +inline int countTrailingZerosNonZero64(unsigned long long num); + +/** + * Same as above, but for int. + */ +inline int countTrailingZerosNonZero32(unsigned int num); #if defined(__GNUC__) int countLeadingZeros64(unsigned long long num) { @@ -53,11 +67,24 @@ int countLeadingZeros64(unsigned long long num) { return __builtin_clzll(num); } +int countLeadingZerosNonZero64(unsigned long long num) { + return __builtin_clzll(num); +} + int countTrailingZeros64(unsigned long long num) { if (num == 0) return 64; return __builtin_ctzll(num); } + +int countTrailingZerosNonZero64(unsigned long long num) { + return __builtin_ctzll(num); +} + +int countTrailingZerosNonZero32(unsigned int num) { + return __builtin_ctz(num); +} + #elif defined(_MSC_VER) && defined(_WIN64) int countLeadingZeros64(unsigned long long num) { unsigned long out; @@ -66,12 +93,31 @@ int countLeadingZeros64(unsigned long long num) { return 64; } +int countLeadingZerosNonZero64(unsigned long long num) { + unsigned long out; + _BitScanReverse64(&out, num); + return 63 ^ out; +} + int countTrailingZeros64(unsigned long long num) { unsigned long out; if (_BitScanForward64(&out, num)) return out; return 64; } + +int countTrailingZerosNonZero64(unsigned long long num) { + unsigned long out; + _BitScanForward64(&out, num); + return out; +} + +int countTrailingZerosNonZero32(unsigned int num) { + unsigned long out; + _BitScanForward(&out, static_cast<unsigned long>(num)); + return out; +} + #elif defined(_MSC_VER) && defined(_WIN32) int countLeadingZeros64(unsigned long long num) { unsigned long out; @@ -82,6 +128,14 @@ int countLeadingZeros64(unsigned long long num) { return 64; } +int countLeadingZerosNonZero64(unsigned long long num) { + unsigned long out; + if (_BitScanReverse(&out, static_cast<unsigned long>(num >> 32))) + return 31 ^ out; + _BitScanReverse(&out, static_cast<unsigned long>(num)); + return 63 ^ out; +} + int countTrailingZeros64(unsigned long long num) { unsigned long out; if (_BitScanForward(&out, static_cast<unsigned long>(num))) @@ -90,6 +144,21 @@ int countTrailingZeros64(unsigned long long num) { return out + 32; return 64; } + +int countTrailingZerosNonZero64(unsigned long long num) { + unsigned long out; + if (_BitScanForward(&out, static_cast<unsigned long>(num))) + return out; + + _BitScanForward(&out, static_cast<unsigned long>(num >> 32)); + return out + 32; +} + +int countTrailingZerosNonZero32(unsigned int num) { + unsigned long out; + _BitScanForward(&out, static_cast<unsigned long>(num)); + return out; +} #else #error "No bit-ops definitions for your platform" #endif |