diff options
author | Mohammad Dashti <mdashti@gmail.com> | 2021-07-14 06:20:55 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-07-14 06:47:23 +0000 |
commit | 5de13812ae182fae098fecd5f3a6874fa6b0c749 (patch) | |
tree | c53ccd852baed0a195c25e2eac268093033106f3 /src/mongo | |
parent | e6ee438de6915c717986bb48bf9a857537ee6bb1 (diff) | |
download | mongo-5de13812ae182fae098fecd5f3a6874fa6b0c749.tar.gz |
SERVER-54078 [SBE] Exponential Vector Expansion (Instead of Linear Expansion) in SBE Arrays and Objects.
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/exec/sbe/values/slot.cpp | 32 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/value.h | 20 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/vm/vm.cpp | 104 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_filter.cpp | 57 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_index_scan.cpp | 2 |
6 files changed, 142 insertions, 82 deletions
diff --git a/src/mongo/db/exec/sbe/values/slot.cpp b/src/mongo/db/exec/sbe/values/slot.cpp index 5f17680fe6c..e0abb16dbec 100644 --- a/src/mongo/db/exec/sbe/values/slot.cpp +++ b/src/mongo/db/exec/sbe/values/slot.cpp @@ -96,10 +96,12 @@ static std::pair<TypeTags, Value> deserializeValue(BufReader& buf) { auto cnt = buf.read<LittleEndian<size_t>>(); auto [arrTag, arrVal] = makeNewArray(); auto arr = getArrayView(arrVal); - arr->reserve(cnt); - for (size_t idx = 0; idx < cnt; ++idx) { - auto [tag, val] = deserializeValue(buf); - arr->push_back(tag, val); + if (cnt) { + arr->reserve(cnt); + for (size_t idx = 0; idx < cnt; ++idx) { + auto [tag, val] = deserializeValue(buf); + arr->push_back(tag, val); + } } tag = arrTag; val = arrVal; @@ -109,10 +111,12 @@ static std::pair<TypeTags, Value> deserializeValue(BufReader& buf) { auto cnt = buf.read<LittleEndian<size_t>>(); auto [arrTag, arrVal] = makeNewArraySet(); auto arr = getArraySetView(arrVal); - arr->reserve(cnt); - for (size_t idx = 0; idx < cnt; ++idx) { - auto [tag, val] = deserializeValue(buf); - arr->push_back(tag, val); + if (cnt) { + arr->reserve(cnt); + for (size_t idx = 0; idx < cnt; ++idx) { + auto [tag, val] = deserializeValue(buf); + arr->push_back(tag, val); + } } tag = arrTag; val = arrVal; @@ -122,11 +126,13 @@ static std::pair<TypeTags, Value> deserializeValue(BufReader& buf) { auto cnt = buf.read<LittleEndian<size_t>>(); auto [objTag, objVal] = makeNewObject(); auto obj = getObjectView(objVal); - obj->reserve(cnt); - for (size_t idx = 0; idx < cnt; ++idx) { - auto fieldName = buf.readCStr(); - auto [tag, val] = deserializeValue(buf); - obj->push_back({fieldName.rawData(), fieldName.size()}, tag, val); + if (cnt) { + obj->reserve(cnt); + for (size_t idx = 0; idx < cnt; ++idx) { + auto fieldName = buf.readCStr(); + auto [tag, val] = deserializeValue(buf); + obj->push_back(fieldName, tag, val); + } } tag = objTag; val = objVal; diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h index f2cdf16d263..bc01d193f90 100644 --- a/src/mongo/db/exec/sbe/values/value.h +++ b/src/mongo/db/exec/sbe/values/value.h @@ -443,9 +443,15 @@ public: ValueGuard guard{tag, val}; // Reserve space in all vectors, they are the same size. We arbitrarily picked _typeTags // to determine the size. - reserve(_typeTags.size() + 1); + if (_typeTags.capacity() == _typeTags.size()) { + // Reserve double capacity. + // Note: we are not concerned about the overflow in the operation below, as the size + // of 'Value' is 8 bytes. Consequently, the maximum capacity ever is 2^64/8 = 2^61. + // We can freely shift 2^61 << 1 without any overflow. + // Note: the case of '_typeTags.capacity() == 1' is handled inside 'reserve' itself. + reserve(_typeTags.capacity() << 1); + } _names.emplace_back(std::string(name)); - _typeTags.push_back(tag); _values.push_back(val); @@ -520,8 +526,14 @@ public: ValueGuard guard{tag, val}; // Reserve space in all vectors, they are the same size. We arbitrarily picked _typeTags // to determine the size. - reserve(_typeTags.size() + 1); - + if (_typeTags.capacity() == _typeTags.size()) { + // Reserve double capacity. + // Note: we are not concerned about the overflow in the operation below, as the size + // of 'Value' is 8 bytes. Consequently, the maximum capacity ever is 2^64/8 = 2^61. + // We can freely shift 2^61 << 1 without any overflow. + // Note: the case of '_typeTags.capacity() == 1' is handled inside 'reserve' itself. + reserve(_typeTags.capacity() << 1); + } _typeTags.push_back(tag); _values.push_back(val); diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp index 6e46d915534..921c9ec7fa8 100644 --- a/src/mongo/db/exec/sbe/vm/vm.cpp +++ b/src/mongo/db/exec/sbe/vm/vm.cpp @@ -875,10 +875,13 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinNewArray(ArityT auto arr = value::getArrayView(val); - for (ArityType idx = 0; idx < arity; ++idx) { - auto [owned, tag, val] = getFromStack(idx); - auto [tagCopy, valCopy] = value::copyValue(tag, val); - arr->push_back(tagCopy, valCopy); + if (arity) { + arr->reserve(arity); + for (ArityType idx = 0; idx < arity; ++idx) { + auto [owned, tag, val] = getFromStack(idx); + auto [tagCopy, valCopy] = value::copyValue(tag, val); + arr->push_back(tagCopy, valCopy); + } } guard.reset(); @@ -914,12 +917,18 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinNewArrayFromRan auto isPositiveStep = stepVal > 0; if (isPositiveStep) { - for (auto i = startVal; i < endVal; i += stepVal) { - arr->push_back(value::TypeTags::NumberInt32, value::bitcastTo<int32_t>(i)); + if (startVal < endVal) { + arr->reserve(1 + (endVal - startVal) / stepVal); + for (auto i = startVal; i < endVal; i += stepVal) { + arr->push_back(value::TypeTags::NumberInt32, value::bitcastTo<int32_t>(i)); + } } } else { - for (auto i = startVal; i > endVal; i += stepVal) { - arr->push_back(value::TypeTags::NumberInt32, value::bitcastTo<int32_t>(i)); + if (startVal > endVal) { + arr->reserve(1 + (startVal - endVal) / (-stepVal)); + for (auto i = startVal; i > endVal; i += stepVal) { + arr->push_back(value::TypeTags::NumberInt32, value::bitcastTo<int32_t>(i)); + } } } @@ -932,6 +941,11 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinNewObj(ArityTyp std::vector<value::Value> values; std::vector<std::string> names; + size_t tmpVectorLen = arity >> 1; + typeTags.reserve(tmpVectorLen); + values.reserve(tmpVectorLen); + names.reserve(tmpVectorLen); + for (ArityType idx = 0; idx < arity; idx += 2) { { auto [owned, tag, val] = getFromStack(idx); @@ -953,9 +967,12 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinNewObj(ArityTyp auto obj = value::getObjectView(val); value::ValueGuard guard{tag, val}; - for (size_t idx = 0; idx < typeTags.size(); ++idx) { - auto [tagCopy, valCopy] = value::copyValue(typeTags[idx], values[idx]); - obj->push_back(names[idx], tagCopy, valCopy); + if (typeTags.size()) { + obj->reserve(typeTags.size()); + for (size_t idx = 0; idx < typeTags.size(); ++idx) { + auto [tagCopy, valCopy] = value::copyValue(typeTags[idx], values[idx]); + obj->push_back(names[idx], tagCopy, valCopy); + } } guard.reset(); @@ -1544,6 +1561,7 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinDateToParts(Ari auto [dateObjTag, dateObjVal] = value::makeNewObject(); value::ValueGuard guard{dateObjTag, dateObjVal}; auto dateObj = value::getObjectView(dateObjVal); + dateObj->reserve(7); dateObj->push_back("year", value::TypeTags::NumberInt32, dateParts.year); dateObj->push_back("month", value::TypeTags::NumberInt32, dateParts.month); dateObj->push_back("day", value::TypeTags::NumberInt32, dateParts.dayOfMonth); @@ -1582,6 +1600,7 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinIsoDateToParts( auto [dateObjTag, dateObjVal] = value::makeNewObject(); value::ValueGuard guard{dateObjTag, dateObjVal}; auto dateObj = value::getObjectView(dateObjVal); + dateObj->reserve(7); dateObj->push_back("isoWeekYear", value::TypeTags::NumberInt32, dateParts.year); dateObj->push_back("isoWeek", value::TypeTags::NumberInt32, dateParts.weekOfYear); dateObj->push_back("isoDayOfWeek", value::TypeTags::NumberInt32, dateParts.dayOfWeek); @@ -2459,19 +2478,22 @@ std::tuple<bool, value::TypeTags, value::Value> buildRegexMatchResultObject( // hold the (start, limit) pairs of indexes, for each of the capture groups. We skip the first // two elements and start iteration from 3rd element so that we only construct the strings for // capture groups. - for (size_t i = 0; i < numCaptures; ++i) { - const auto start = capturesBuffer[2 * (i + 1)]; - const auto limit = capturesBuffer[2 * (i + 1) + 1]; - if (!verifyBounds(start, limit, true)) { - return {false, value::TypeTags::Nothing, 0}; - } + if (numCaptures) { + arrayView->reserve(numCaptures); + for (size_t i = 0; i < numCaptures; ++i) { + const auto start = capturesBuffer[2 * (i + 1)]; + const auto limit = capturesBuffer[2 * (i + 1) + 1]; + if (!verifyBounds(start, limit, true)) { + return {false, value::TypeTags::Nothing, 0}; + } - if (start == -1 && limit == -1) { - arrayView->push_back(value::TypeTags::Null, 0); - } else { - auto captureString = inputString.substr(start, limit - start); - auto [tag, val] = value::makeNewString(captureString); - arrayView->push_back(tag, val); + if (start == -1 && limit == -1) { + arrayView->push_back(value::TypeTags::Null, 0); + } else { + auto captureString = inputString.substr(start, limit - start); + auto [tag, val] = value::makeNewString(captureString); + arrayView->push_back(tag, val); + } } } @@ -2822,11 +2844,14 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinExtractSubArray }(); size_t end = convertedStart + std::min(length, arraySize - convertedStart); + if (convertedStart < end) { + resultView->reserve(end - convertedStart); - for (size_t i = convertedStart; i < end; i++) { - auto [tag, value] = arrayView->getAt(i); - auto [copyTag, copyValue] = value::copyValue(tag, value); - resultView->push_back(copyTag, copyValue); + for (size_t i = convertedStart; i < end; i++) { + auto [tag, value] = arrayView->getAt(i); + auto [copyTag, copyValue] = value::copyValue(tag, value); + resultView->push_back(copyTag, copyValue); + } } } else { auto advance = [](value::ArrayEnumerator& enumerator, size_t offset) { @@ -2972,11 +2997,13 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinReverseArray(Ar if (inputType == value::TypeTags::Array) { auto inputView = value::getArrayView(inputVal); size_t inputSize = inputView->size(); - resultView->reserve(inputSize); - for (size_t i = 0; i < inputSize; i++) { - auto [origTag, origVal] = inputView->getAt(inputSize - 1 - i); - auto [copyTag, copyVal] = copyValue(origTag, origVal); - resultView->push_back(copyTag, copyVal); + if (inputSize) { + resultView->reserve(inputSize); + for (size_t i = 0; i < inputSize; i++) { + auto [origTag, origVal] = inputView->getAt(inputSize - 1 - i); + auto [copyTag, copyVal] = copyValue(origTag, origVal); + resultView->push_back(copyTag, copyVal); + } } resultGuard.reset(); @@ -2992,7 +3019,6 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinReverseArray(Ar // Reserve space to avoid resizing on push_back calls. auto arraySetView = value::getArraySetView(inputVal); inputContents.reserve(arraySetView->size()); - resultView->reserve(arraySetView->size()); } while (!enumerator.atEnd()) { @@ -3000,10 +3026,14 @@ std::tuple<bool, value::TypeTags, value::Value> ByteCode::builtinReverseArray(Ar enumerator.advance(); } - // Run through the array backwards and copy into the result array. - for (auto it = inputContents.rbegin(); it != inputContents.rend(); ++it) { - auto [copyTag, copyVal] = copyValue(it->first, it->second); - resultView->push_back(copyTag, copyVal); + if (inputContents.size()) { + resultView->reserve(inputContents.size()); + + // Run through the array backwards and copy into the result array. + for (auto it = inputContents.rbegin(); it != inputContents.rend(); ++it) { + auto [copyTag, copyVal] = copyValue(it->first, it->second); + resultView->push_back(copyTag, copyVal); + } } resultGuard.reset(); diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index f2f53662346..2a36d846e47 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -725,9 +725,12 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder sbe::value::ValueGuard inputGuard{inputTag, inputVal}; auto inputView = sbe::value::getArrayView(inputVal); - for (auto& doc : vsn->docs) { - auto [tag, val] = makeValue(doc); - inputView->push_back(tag, val); + if (vsn->docs.size()) { + inputView->reserve(vsn->docs.size()); + for (auto& doc : vsn->docs) { + auto [tag, val] = makeValue(doc); + inputView->push_back(tag, val); + } } inputGuard.reset(); diff --git a/src/mongo/db/query/sbe_stage_builder_filter.cpp b/src/mongo/db/query/sbe_stage_builder_filter.cpp index cedb52a3f5b..f58ee002164 100644 --- a/src/mongo/db/query/sbe_stage_builder_filter.cpp +++ b/src/mongo/db/query/sbe_stage_builder_filter.cpp @@ -848,14 +848,16 @@ void generateBitTest(MatchExpressionVisitorContext* context, // checking if an item has already been seen. auto [bitPosTag, bitPosVal] = sbe::value::makeNewArray(); auto arr = sbe::value::getArrayView(bitPosVal); - arr->reserve(bitPositions.size()); - - std::set<uint32_t> seenBits; - for (size_t index = 0; index < bitPositions.size(); ++index) { - auto currentBit = bitPositions[index]; - if (auto result = seenBits.insert(currentBit); result.second) { - arr->push_back(sbe::value::TypeTags::NumberInt64, - sbe::value::bitcastFrom<int64_t>(currentBit)); + if (bitPositions.size()) { + arr->reserve(bitPositions.size()); + + std::set<uint32_t> seenBits; + for (size_t index = 0; index < bitPositions.size(); ++index) { + auto currentBit = bitPositions[index]; + if (auto result = seenBits.insert(currentBit); result.second) { + arr->push_back(sbe::value::TypeTags::NumberInt64, + sbe::value::bitcastFrom<int64_t>(currentBit)); + } } } @@ -1441,18 +1443,21 @@ public: auto hasArray = false; auto hasNull = false; - for (auto&& equality : equalities) { - auto [tagView, valView] = - sbe::bson::convertFrom<true>(equality.rawdata(), - equality.rawdata() + equality.size(), - equality.fieldNameSize() - 1); - - hasNull |= tagView == sbe::value::TypeTags::Null; - hasArray |= sbe::value::isArray(tagView); - - // An ArraySet assumes ownership of it's values so we have to make a copy here. - auto [tag, val] = sbe::value::copyValue(tagView, valView); - arrSet->push_back(tag, val); + if (equalities.size()) { + arrSet->reserve(equalities.size()); + for (auto&& equality : equalities) { + auto [tagView, valView] = + sbe::bson::convertFrom<true>(equality.rawdata(), + equality.rawdata() + equality.size(), + equality.fieldNameSize() - 1); + + hasNull |= tagView == sbe::value::TypeTags::Null; + hasArray |= sbe::value::isArray(tagView); + + // An ArraySet assumes ownership of it's values so we have to make a copy here. + auto [tag, val] = sbe::value::copyValue(tagView, valView); + arrSet->push_back(tag, val); + } } const auto traversalMode = hasArray ? LeafTraversalMode::kArrayAndItsElements @@ -1495,12 +1500,14 @@ public: auto arr = sbe::value::getArrayView(arrVal); - arr->reserve(regexes.size()); + if (regexes.size()) { + arr->reserve(regexes.size()); - for (auto&& r : regexes) { - auto [regexTag, regexVal] = - sbe::value::makeNewPcreRegex(r->getString(), r->getFlags()); - arr->push_back(regexTag, regexVal); + for (auto&& r : regexes) { + auto [regexTag, regexVal] = + sbe::value::makeNewPcreRegex(r->getString(), r->getFlags()); + arr->push_back(regexTag, regexVal); + } } auto makePredicate = diff --git a/src/mongo/db/query/sbe_stage_builder_index_scan.cpp b/src/mongo/db/query/sbe_stage_builder_index_scan.cpp index 9eeeeab6b30..3727ea54294 100644 --- a/src/mongo/db/query/sbe_stage_builder_index_scan.cpp +++ b/src/mongo/db/query/sbe_stage_builder_index_scan.cpp @@ -304,9 +304,11 @@ generateOptimizedMultiIntervalIndexScan( // {l: KS(...), h: KS(...)}, ... ] auto [boundsTag, boundsVal] = sbe::value::makeNewArray(); auto arr = sbe::value::getArrayView(boundsVal); + arr->reserve(intervals.size()); for (auto&& [lowKey, highKey] : intervals) { auto [tag, val] = sbe::value::makeNewObject(); auto obj = sbe::value::getObjectView(val); + obj->reserve(2); obj->push_back("l"_sd, sbe::value::TypeTags::ksValue, sbe::value::bitcastFrom<KeyString::Value*>(lowKey.release())); |