diff options
author | Drew Paroski <drew.paroski@mongodb.com> | 2023-04-18 03:26:55 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-27 21:24:03 +0000 |
commit | 0145677e42d72b24910401f06da986c767738914 (patch) | |
tree | bf11371090b38f0ea99a648d1b180197a2dc4f8b /src/mongo | |
parent | 07b38e091b48acd305469d525b81aebf3aeadbf1 (diff) | |
download | mongo-0145677e42d72b24910401f06da986c767738914.tar.gz |
SERVER-76510 Make makeBsonObj() preserve field order
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/exec/sbe/makeobj_spec.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/makeobj_spec.h | 24 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/vm/vm.cpp | 183 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/vm/vm.h | 27 |
4 files changed, 177 insertions, 59 deletions
diff --git a/src/mongo/db/exec/sbe/makeobj_spec.cpp b/src/mongo/db/exec/sbe/makeobj_spec.cpp index e23faac3dcf..f6b037b5623 100644 --- a/src/mongo/db/exec/sbe/makeobj_spec.cpp +++ b/src/mongo/db/exec/sbe/makeobj_spec.cpp @@ -42,7 +42,7 @@ IndexedStringVector MakeObjSpec::buildIndexedFieldVector(std::vector<std::string size_t MakeObjSpec::getApproximateSize() const { auto size = sizeof(MakeObjSpec); - size += size_estimator::estimate(fieldsAndProjects); + size += size_estimator::estimate(fieldNames); return size; } } // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/makeobj_spec.h b/src/mongo/db/exec/sbe/makeobj_spec.h index 2c488fa176b..b335b630a7f 100644 --- a/src/mongo/db/exec/sbe/makeobj_spec.h +++ b/src/mongo/db/exec/sbe/makeobj_spec.h @@ -50,34 +50,34 @@ struct MakeObjSpec { std::vector<std::string> fields, std::vector<std::string> projects) : fieldBehavior(fieldBehavior), - numFields(fields.size()), - fieldsAndProjects(buildIndexedFieldVector(std::move(fields), std::move(projects))) {} + numKeepOrDrops(fields.size()), + fieldNames(buildIndexedFieldVector(std::move(fields), std::move(projects))) {} MakeObjSpec(const MakeObjSpec& other) : fieldBehavior(other.fieldBehavior), - numFields(other.numFields), - fieldsAndProjects(other.fieldsAndProjects) {} + numKeepOrDrops(other.numKeepOrDrops), + fieldNames(other.fieldNames) {} MakeObjSpec(MakeObjSpec&& other) : fieldBehavior(other.fieldBehavior), - numFields(other.numFields), - fieldsAndProjects(std::move(other.fieldsAndProjects)) {} + numKeepOrDrops(other.numKeepOrDrops), + fieldNames(std::move(other.fieldNames)) {} std::string toString() const { StringBuilder builder; builder << (fieldBehavior == MakeObjSpec::FieldBehavior::keep ? "keep" : "drop") << ", ["; - for (size_t i = 0; i < fieldsAndProjects.size(); ++i) { - if (i == numFields) { + for (size_t i = 0; i < fieldNames.size(); ++i) { + if (i == numKeepOrDrops) { builder << "], ["; } else if (i != 0) { builder << ", "; } - builder << '"' << fieldsAndProjects[i] << '"'; + builder << '"' << fieldNames[i] << '"'; } - if (fieldsAndProjects.size() == numFields) { + if (fieldNames.size() == numKeepOrDrops) { builder << "], ["; } @@ -89,7 +89,7 @@ struct MakeObjSpec { size_t getApproximateSize() const; FieldBehavior fieldBehavior; - size_t numFields = 0; - IndexedStringVector fieldsAndProjects; + size_t numKeepOrDrops = 0; + IndexedStringVector fieldNames; }; } // namespace mongo::sbe diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp index 4e533214f97..97276df8c71 100644 --- a/src/mongo/db/exec/sbe/vm/vm.cpp +++ b/src/mongo/db/exec/sbe/vm/vm.cpp @@ -5315,88 +5315,181 @@ FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinSortKeyComponent } } -std::pair<value::TypeTags, value::Value> ByteCode::produceBsonObject(const MakeObjSpec* mos, +std::pair<value::TypeTags, value::Value> ByteCode::produceBsonObject(const MakeObjSpec* spec, value::TypeTags rootTag, value::Value rootVal, - const size_t startIdx) { - auto& fieldBehavior = mos->fieldBehavior; - auto& fieldsAndProjects = mos->fieldsAndProjects; - auto numFields = mos->numFields; + int stackOffset) { + auto& fieldNames = spec->fieldNames; + + const bool isInclusion = spec->fieldBehavior == MakeObjSpec::FieldBehavior::keep; + const size_t numFields = fieldNames.size(); + const size_t numKeepOrDrops = spec->numKeepOrDrops; + const size_t numComputedFields = numFields - numKeepOrDrops; + + // The "visited" array keeps track of which computed fields have been visited so far so that + // later we can append the non-visited computed fields to the end of the object. + char* visited = nullptr; + char localVisitedArr[64]; + std::unique_ptr<char[]> allocatedVisitedArr; + if (MONGO_unlikely(numComputedFields > 64)) { + allocatedVisitedArr = std::make_unique<char[]>(numComputedFields); + visited = allocatedVisitedArr.get(); + } else { + visited = &localVisitedArr[0]; + } - const bool isInclusion = fieldBehavior == MakeObjSpec::FieldBehavior::keep; + memset(visited, 0, numComputedFields); UniqueBSONObjBuilder bob; - if (value::isObject(rootTag)) { - size_t nFieldsIfInclusion = numFields; + size_t numFieldsRemaining = numFields; + size_t numComputedFieldsRemaining = numComputedFields; + + const size_t numFieldsRemainingThreshold = isInclusion ? 1 : 0; + if (value::isObject(rootTag)) { if (rootTag == value::TypeTags::bsonObject) { - if (!(nFieldsIfInclusion == 0 && isInclusion)) { - auto be = value::bitcastTo<const char*>(rootVal); - const auto end = be + ConstDataView(be).read<LittleEndian<uint32_t>>(); + auto be = value::bitcastTo<const char*>(rootVal); + const auto end = be + ConstDataView(be).read<LittleEndian<uint32_t>>(); - // Skip document length. - be += 4; + // Skip document length. + be += 4; + + // Let N = the # of "computed" fields, and let K = (isInclusion && N > 0 ? N-1 : N). + // + // When we have seen all of the "keepOrDrop" fields and when we have seen K of the + // "computed" fields, we can break out of this loop, ignore or copy the remaining + // fields from 'rootVal' (depending on whether 'isInclusion' is true or false), and + // then finally append the remaining computed field (if there is one) to the output + // object. + // + // (When isInclusion == true and a single "computed" field remains, it's okay to stop + // scanning 'rootVal' and append the remaining computed field to the end of the output + // object because it will have no observable effect on field order.) + if (numFieldsRemaining > numFieldsRemainingThreshold || + numFieldsRemaining != numComputedFieldsRemaining) { while (be != end - 1) { auto sv = bson::fieldNameAndLength(be); auto nextBe = bson::advance(be, sv.size()); + size_t pos = fieldNames.findPos(sv); - bool isProjectedOrRestricted; - size_t pos = fieldsAndProjects.findPos(sv); if (pos == IndexedStringVector::npos) { - isProjectedOrRestricted = isInclusion; - } else if (pos < numFields) { - isProjectedOrRestricted = !isInclusion; + if (!isInclusion) { + bob.append(BSONElement(be, sv.size() + 1, nextBe - be)); + } + } else if (pos < numKeepOrDrops) { + --numFieldsRemaining; + + if (isInclusion) { + bob.append(BSONElement(be, sv.size() + 1, nextBe - be)); + } } else { - isProjectedOrRestricted = true; - } + --numFieldsRemaining; + --numComputedFieldsRemaining; + + auto projectIdx = pos - numKeepOrDrops; + visited[projectIdx] = 1; - if (!isProjectedOrRestricted) { - bob.append(BSONElement(be, sv.size() + 1, nextBe - be)); - --nFieldsIfInclusion; + size_t argIdx = stackOffset + projectIdx; + auto [_, tag, val] = getFromStack(argIdx); + bson::appendValueToBsonObj(bob, fieldNames[pos], tag, val); } - if (nFieldsIfInclusion == 0 && isInclusion) { + be = nextBe; + + if (numFieldsRemaining <= numFieldsRemainingThreshold && + numFieldsRemaining == numComputedFieldsRemaining) { break; } + } + } + + // If isInclusion == false and 'be' has not reached the end of 'rootVal', copy over + // the remaining fields from 'rootVal' to the output object. + if (!isInclusion) { + while (be != end - 1) { + auto sv = bson::fieldNameAndLength(be); + auto nextBe = bson::advance(be, sv.size()); + bob.append(BSONElement(be, sv.size() + 1, nextBe - be)); be = nextBe; } } } else if (rootTag == value::TypeTags::Object) { - if (!(nFieldsIfInclusion == 0 && isInclusion)) { - auto objRoot = value::getObjectView(rootVal); - for (size_t idx = 0; idx < objRoot->size(); ++idx) { + auto objRoot = value::getObjectView(rootVal); + size_t idx = 0; + + // Let N = number of "computed" fields, and let K = (isInclusion && N > 0 ? N-1 : N). + // + // When we have seen all of the "keepOrDrop" fields and when we have seen K of the + // "computed" fields, we can break out of this loop, ignore or copy the remaining + // fields from 'rootVal' (depending on whether 'isInclusion' is true or false), and + // then finally append the remaining computed field (if there is one) to the output + // object. + // + // (When isInclusion == true and a single "computed" field remains, it's okay to stop + // scanning 'rootVal' and append the remaining computed field to the end of the output + // object because it will have no observable effect on field order.) + if (numFieldsRemaining > numFieldsRemainingThreshold || + numFieldsRemaining != numComputedFieldsRemaining) { + for (; idx < objRoot->size(); ++idx) { auto sv = StringData(objRoot->field(idx)); + size_t pos = fieldNames.findPos(sv); - bool isProjectedOrRestricted; - size_t pos = fieldsAndProjects.findPos(sv); if (pos == IndexedStringVector::npos) { - isProjectedOrRestricted = isInclusion; - } else if (pos < numFields) { - isProjectedOrRestricted = !isInclusion; + if (!isInclusion) { + auto [tag, val] = objRoot->getAt(idx); + bson::appendValueToBsonObj(bob, objRoot->field(idx), tag, val); + } + } else if (pos < numKeepOrDrops) { + --numFieldsRemaining; + + if (isInclusion) { + auto [tag, val] = objRoot->getAt(idx); + bson::appendValueToBsonObj(bob, objRoot->field(idx), tag, val); + } } else { - isProjectedOrRestricted = true; - } + --numFieldsRemaining; + --numComputedFieldsRemaining; - if (!isProjectedOrRestricted) { - auto [tag, val] = objRoot->getAt(idx); - bson::appendValueToBsonObj(bob, objRoot->field(idx), tag, val); - --nFieldsIfInclusion; + auto projectIdx = pos - numKeepOrDrops; + visited[projectIdx] = 1; + + size_t argIdx = stackOffset + projectIdx; + auto [_, tag, val] = getFromStack(argIdx); + bson::appendValueToBsonObj(bob, fieldNames[pos], tag, val); } - if (nFieldsIfInclusion == 0 && isInclusion) { + if (numFieldsRemaining <= numFieldsRemainingThreshold && + numFieldsRemaining == numComputedFieldsRemaining) { + ++idx; break; } } } + + // If isInclusion == false and 'be' has not reached the end of 'rootVal', copy over + // the remaining fields from 'rootVal' to the output object. + if (!isInclusion) { + for (; idx < objRoot->size(); ++idx) { + auto sv = StringData(objRoot->field(idx)); + auto [fieldTag, fieldVal] = objRoot->getAt(idx); + bson::appendValueToBsonObj(bob, sv, fieldTag, fieldVal); + } + } } } - for (size_t idx = numFields; idx < fieldsAndProjects.size(); ++idx) { - auto argIdx = startIdx + (idx - numFields); - auto [_, tag, val] = getFromStack(argIdx); - bson::appendValueToBsonObj(bob, fieldsAndProjects[idx], tag, val); + // Append the remaining computed fields (if any) to the output object. + if (numComputedFieldsRemaining > 0) { + for (size_t pos = numKeepOrDrops; pos < fieldNames.size(); ++pos) { + auto projectIdx = pos - numKeepOrDrops; + if (!visited[projectIdx]) { + size_t argIdx = stackOffset + projectIdx; + auto [_, tag, val] = getFromStack(argIdx); + bson::appendValueToBsonObj(bob, fieldNames[pos], tag, val); + } + } } bob.doneFast(); @@ -5418,9 +5511,9 @@ FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinMakeBsonObj(Arit auto mos = value::getMakeObjSpecView(mosVal); - const size_t startIdx = 2; + const int stackOffset = 2; - auto [tag, val] = produceBsonObject(mos, objTag, objVal, startIdx); + auto [tag, val] = produceBsonObject(mos, objTag, objVal, stackOffset); return {true, tag, val}; } diff --git a/src/mongo/db/exec/sbe/vm/vm.h b/src/mongo/db/exec/sbe/vm/vm.h index 7b25ff7fb03..2deb8b0c907 100644 --- a/src/mongo/db/exec/sbe/vm/vm.h +++ b/src/mongo/db/exec/sbe/vm/vm.h @@ -1405,10 +1405,35 @@ private: TimeZone timezone, DayOfWeek startOfWeek); + /** + * produceBsonObject() takes a MakeObjSpec ('spec'), a root value ('rootTag' and 'rootVal'), + * and 0 or more "computed" values as inputs, it builds an output BSON object based on the + * instructions provided by 'spec' and based on the contents of 'root' and the computed input + * values, and then it returns the output object. (Note the computed input values are not + * directly passed in as C++ parameters -- instead the computed input values are passed via + * the VM's stack.) + * + * 'spec' provides two lists of field names: "keepOrDrop" fields and "computed" fields. These + * lists are disjoint and do not contain duplicates. The number of computed input values passed + * in by the caller on the VM stack must match the number of fields in the "computed" list. + * + * For each field F in the "computed" list, this method will retrieve the corresponding computed + * input value V from the VM stack and add {F,V} to the output object. + * + * If 'root' is not an object, it is ignored. Otherwise, for each field F in 'root' with value V + * that does not appear in the "computed" list, this method will copy {F,V} to the output object + * if either: (1) field F appears in the "keepOrDrop" list and 'spec->fieldBehavior == keep'; or + * (2) field F does _not_ appear in the "keepOrDrop" list and 'spec->fieldBehavior == drop'. If + * neither of these conditions are met, field F in 'root' will be ignored. + * + * For any two distinct fields F1 and F2 in the output object, if F1 is in 'root' and F2 does + * not appear before F1 in 'root', -OR- if both F1 and F2 are not in 'root' and F2 does not + * appear before F1 in the "computed" list, then F2 will appear after F1 in the output object. + */ std::pair<value::TypeTags, value::Value> produceBsonObject(const MakeObjSpec* mos, value::TypeTags rootTag, value::Value rootVal, - size_t startIdx); + int stackOffset); FastTuple<bool, value::TypeTags, value::Value> builtinSplit(ArityType arity); FastTuple<bool, value::TypeTags, value::Value> builtinDate(ArityType arity); |