From 952085e37634d5beb992def4d32bb700f41ef03c Mon Sep 17 00:00:00 2001 From: Bobby Morck Date: Mon, 27 Sep 2021 16:37:10 -0400 Subject: SERVER-59112 Adding support for $mergeObjects in SBE --- src/mongo/db/exec/sbe/expressions/expression.cpp | 1 + src/mongo/db/exec/sbe/vm/vm.cpp | 74 ++++++++++++++++++++++++ src/mongo/db/exec/sbe/vm/vm.h | 2 + 3 files changed, 77 insertions(+) (limited to 'src/mongo/db/exec') diff --git a/src/mongo/db/exec/sbe/expressions/expression.cpp b/src/mongo/db/exec/sbe/expressions/expression.cpp index 24d559f331c..8e1a8785ec0 100644 --- a/src/mongo/db/exec/sbe/expressions/expression.cpp +++ b/src/mongo/db/exec/sbe/expressions/expression.cpp @@ -403,6 +403,7 @@ static stdx::unordered_map kBuiltinFunctions = { {"log10", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::log10, false}}, {"sqrt", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::sqrt, false}}, {"addToArray", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::addToArray, true}}, + {"mergeObjects", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::mergeObjects, true}}, {"addToSet", BuiltinFn{[](size_t n) { return n == 1; }, vm::Builtin::addToSet, true}}, {"collAddToSet", BuiltinFn{[](size_t n) { return n == 2; }, vm::Builtin::collAddToSet, true}}, {"doubleDoubleSum", diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp index 546baa7a3d6..8010c9b57dd 100644 --- a/src/mongo/db/exec/sbe/vm/vm.cpp +++ b/src/mongo/db/exec/sbe/vm/vm.cpp @@ -1623,6 +1623,78 @@ std::tuple ByteCode::builtinAddToArray(Arit return {ownAgg, tagAgg, valAgg}; } +// TODO SERVER-60289: A new accumulator is allocated whenever a new value needs to be +// accumulated. As the accumulated value is an Object type, this may cause performance +// issues. Possible solutions to this could include extending the interface of Object or creating +// a new type that better supports the mergeObjects operation. +std::tuple ByteCode::builtinMergeObjects(ArityType arity) { + auto [ownAgg, tagAgg, valAgg] = getFromStack(0); + auto [_, tagField, valField] = getFromStack(1); + + // Create a new object if it does not exist yet. + if (tagAgg == value::TypeTags::Nothing) { + ownAgg = true; + std::tie(tagAgg, valAgg) = value::makeNewObject(); + } else { + // Take ownership of the accumulator. + topStack(false, value::TypeTags::Nothing, 0); + } + value::ValueGuard guard{tagAgg, valAgg}; + + invariant(ownAgg && tagAgg == value::TypeTags::Object); + + if (tagField == value::TypeTags::Nothing || tagField == value::TypeTags::Null) { + guard.reset(); + return {ownAgg, tagAgg, valAgg}; + } + + StringMap> currObjMap; + for (auto currObjEnumerator = value::ObjectEnumerator{tagField, valField}; + !currObjEnumerator.atEnd(); + currObjEnumerator.advance()) { + currObjMap[currObjEnumerator.getFieldName()] = currObjEnumerator.getViewOfValue(); + } + + // Process the accumulated fields and copy them over to new accumulator if it + // doesn't exist within the current object being processed or copy over field + // from the current object directly. Preserves the order of existing fields in the + // accumulator + auto [newTagAgg, newValAgg] = value::makeNewObject(); + value::ValueGuard newGuard{newTagAgg, newValAgg}; + auto newObj = value::getObjectView(newValAgg); + for (auto aggObjEnumerator = value::ObjectEnumerator{tagAgg, valAgg}; !aggObjEnumerator.atEnd(); + aggObjEnumerator.advance()) { + auto it = currObjMap.find(aggObjEnumerator.getFieldName()); + if (it == currObjMap.end()) { + auto [aggObjTag, aggObjVal] = aggObjEnumerator.getViewOfValue(); + auto [aggObjTagCopy, aggObjValCopy] = value::copyValue(aggObjTag, aggObjVal); + newObj->push_back(aggObjEnumerator.getFieldName(), aggObjTagCopy, aggObjValCopy); + } else { + auto [currObjTag, currObjVal] = it->second; + auto [currObjTagCopy, currObjValCopy] = value::copyValue(currObjTag, currObjVal); + newObj->push_back(aggObjEnumerator.getFieldName(), currObjTagCopy, currObjValCopy); + currObjMap.erase(it); + } + } + + // Copy the remaining fields of the current object being processed to the new + // accumulator. Fields that were already present in the accumulated fields + // have been copied over already. Preserves the relative order of the new fields + for (auto currObjEnumerator = value::ObjectEnumerator{tagField, valField}; + !currObjEnumerator.atEnd(); + currObjEnumerator.advance()) { + auto it = currObjMap.find(currObjEnumerator.getFieldName()); + if (it != currObjMap.end()) { + auto [currObjTag, currObjVal] = it->second; + auto [currObjTagCopy, currObjValCopy] = value::copyValue(currObjTag, currObjVal); + newObj->push_back(currObjEnumerator.getFieldName(), currObjTagCopy, currObjValCopy); + } + } + + newGuard.reset(); + return {ownAgg, newTagAgg, newValAgg}; +} + std::tuple ByteCode::builtinAddToSet(ArityType arity) { auto [ownAgg, tagAgg, valAgg] = getFromStack(0); auto [_, tagField, valField] = getFromStack(1); @@ -3693,6 +3765,8 @@ std::tuple ByteCode::dispatchBuiltin(Builti return builtinSqrt(arity); case Builtin::addToArray: return builtinAddToArray(arity); + case Builtin::mergeObjects: + return builtinMergeObjects(arity); case Builtin::addToSet: return builtinAddToSet(arity); case Builtin::collAddToSet: diff --git a/src/mongo/db/exec/sbe/vm/vm.h b/src/mongo/db/exec/sbe/vm/vm.h index 464caaab8ff..1174ff901f8 100644 --- a/src/mongo/db/exec/sbe/vm/vm.h +++ b/src/mongo/db/exec/sbe/vm/vm.h @@ -340,6 +340,7 @@ enum class Builtin : uint8_t { log10, sqrt, addToArray, // agg function to append to an array + mergeObjects, // agg function to merge BSON documents addToSet, // agg function to append to a set collAddToSet, // agg function to append to a set (with collation) doubleDoubleSum, // special double summation @@ -902,6 +903,7 @@ private: std::tuple builtinLog10(ArityType arity); std::tuple builtinSqrt(ArityType arity); std::tuple builtinAddToArray(ArityType arity); + std::tuple builtinMergeObjects(ArityType arity); std::tuple builtinAddToSet(ArityType arity); std::tuple builtinCollAddToSet(ArityType arity); std::tuple builtinDoubleDoubleSum(ArityType arity); -- cgit v1.2.1