summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorDrew Paroski <drew.paroski@mongodb.com>2023-04-18 03:26:55 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-27 21:24:03 +0000
commit0145677e42d72b24910401f06da986c767738914 (patch)
treebf11371090b38f0ea99a648d1b180197a2dc4f8b /src/mongo
parent07b38e091b48acd305469d525b81aebf3aeadbf1 (diff)
downloadmongo-0145677e42d72b24910401f06da986c767738914.tar.gz
SERVER-76510 Make makeBsonObj() preserve field order
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/exec/sbe/makeobj_spec.cpp2
-rw-r--r--src/mongo/db/exec/sbe/makeobj_spec.h24
-rw-r--r--src/mongo/db/exec/sbe/vm/vm.cpp183
-rw-r--r--src/mongo/db/exec/sbe/vm/vm.h27
4 files changed, 177 insertions, 59 deletions
diff --git a/src/mongo/db/exec/sbe/makeobj_spec.cpp b/src/mongo/db/exec/sbe/makeobj_spec.cpp
index e23faac3dcf..f6b037b5623 100644
--- a/src/mongo/db/exec/sbe/makeobj_spec.cpp
+++ b/src/mongo/db/exec/sbe/makeobj_spec.cpp
@@ -42,7 +42,7 @@ IndexedStringVector MakeObjSpec::buildIndexedFieldVector(std::vector<std::string
size_t MakeObjSpec::getApproximateSize() const {
auto size = sizeof(MakeObjSpec);
- size += size_estimator::estimate(fieldsAndProjects);
+ size += size_estimator::estimate(fieldNames);
return size;
}
} // namespace mongo::sbe
diff --git a/src/mongo/db/exec/sbe/makeobj_spec.h b/src/mongo/db/exec/sbe/makeobj_spec.h
index 2c488fa176b..b335b630a7f 100644
--- a/src/mongo/db/exec/sbe/makeobj_spec.h
+++ b/src/mongo/db/exec/sbe/makeobj_spec.h
@@ -50,34 +50,34 @@ struct MakeObjSpec {
std::vector<std::string> fields,
std::vector<std::string> projects)
: fieldBehavior(fieldBehavior),
- numFields(fields.size()),
- fieldsAndProjects(buildIndexedFieldVector(std::move(fields), std::move(projects))) {}
+ numKeepOrDrops(fields.size()),
+ fieldNames(buildIndexedFieldVector(std::move(fields), std::move(projects))) {}
MakeObjSpec(const MakeObjSpec& other)
: fieldBehavior(other.fieldBehavior),
- numFields(other.numFields),
- fieldsAndProjects(other.fieldsAndProjects) {}
+ numKeepOrDrops(other.numKeepOrDrops),
+ fieldNames(other.fieldNames) {}
MakeObjSpec(MakeObjSpec&& other)
: fieldBehavior(other.fieldBehavior),
- numFields(other.numFields),
- fieldsAndProjects(std::move(other.fieldsAndProjects)) {}
+ numKeepOrDrops(other.numKeepOrDrops),
+ fieldNames(std::move(other.fieldNames)) {}
std::string toString() const {
StringBuilder builder;
builder << (fieldBehavior == MakeObjSpec::FieldBehavior::keep ? "keep" : "drop") << ", [";
- for (size_t i = 0; i < fieldsAndProjects.size(); ++i) {
- if (i == numFields) {
+ for (size_t i = 0; i < fieldNames.size(); ++i) {
+ if (i == numKeepOrDrops) {
builder << "], [";
} else if (i != 0) {
builder << ", ";
}
- builder << '"' << fieldsAndProjects[i] << '"';
+ builder << '"' << fieldNames[i] << '"';
}
- if (fieldsAndProjects.size() == numFields) {
+ if (fieldNames.size() == numKeepOrDrops) {
builder << "], [";
}
@@ -89,7 +89,7 @@ struct MakeObjSpec {
size_t getApproximateSize() const;
FieldBehavior fieldBehavior;
- size_t numFields = 0;
- IndexedStringVector fieldsAndProjects;
+ size_t numKeepOrDrops = 0;
+ IndexedStringVector fieldNames;
};
} // namespace mongo::sbe
diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp
index 4e533214f97..97276df8c71 100644
--- a/src/mongo/db/exec/sbe/vm/vm.cpp
+++ b/src/mongo/db/exec/sbe/vm/vm.cpp
@@ -5315,88 +5315,181 @@ FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinSortKeyComponent
}
}
-std::pair<value::TypeTags, value::Value> ByteCode::produceBsonObject(const MakeObjSpec* mos,
+std::pair<value::TypeTags, value::Value> ByteCode::produceBsonObject(const MakeObjSpec* spec,
value::TypeTags rootTag,
value::Value rootVal,
- const size_t startIdx) {
- auto& fieldBehavior = mos->fieldBehavior;
- auto& fieldsAndProjects = mos->fieldsAndProjects;
- auto numFields = mos->numFields;
+ int stackOffset) {
+ auto& fieldNames = spec->fieldNames;
+
+ const bool isInclusion = spec->fieldBehavior == MakeObjSpec::FieldBehavior::keep;
+ const size_t numFields = fieldNames.size();
+ const size_t numKeepOrDrops = spec->numKeepOrDrops;
+ const size_t numComputedFields = numFields - numKeepOrDrops;
+
+ // The "visited" array keeps track of which computed fields have been visited so far so that
+ // later we can append the non-visited computed fields to the end of the object.
+ char* visited = nullptr;
+ char localVisitedArr[64];
+ std::unique_ptr<char[]> allocatedVisitedArr;
+ if (MONGO_unlikely(numComputedFields > 64)) {
+ allocatedVisitedArr = std::make_unique<char[]>(numComputedFields);
+ visited = allocatedVisitedArr.get();
+ } else {
+ visited = &localVisitedArr[0];
+ }
- const bool isInclusion = fieldBehavior == MakeObjSpec::FieldBehavior::keep;
+ memset(visited, 0, numComputedFields);
UniqueBSONObjBuilder bob;
- if (value::isObject(rootTag)) {
- size_t nFieldsIfInclusion = numFields;
+ size_t numFieldsRemaining = numFields;
+ size_t numComputedFieldsRemaining = numComputedFields;
+
+ const size_t numFieldsRemainingThreshold = isInclusion ? 1 : 0;
+ if (value::isObject(rootTag)) {
if (rootTag == value::TypeTags::bsonObject) {
- if (!(nFieldsIfInclusion == 0 && isInclusion)) {
- auto be = value::bitcastTo<const char*>(rootVal);
- const auto end = be + ConstDataView(be).read<LittleEndian<uint32_t>>();
+ auto be = value::bitcastTo<const char*>(rootVal);
+ const auto end = be + ConstDataView(be).read<LittleEndian<uint32_t>>();
- // Skip document length.
- be += 4;
+ // Skip document length.
+ be += 4;
+
+ // Let N = the # of "computed" fields, and let K = (isInclusion && N > 0 ? N-1 : N).
+ //
+ // When we have seen all of the "keepOrDrop" fields and when we have seen K of the
+ // "computed" fields, we can break out of this loop, ignore or copy the remaining
+ // fields from 'rootVal' (depending on whether 'isInclusion' is true or false), and
+ // then finally append the remaining computed field (if there is one) to the output
+ // object.
+ //
+ // (When isInclusion == true and a single "computed" field remains, it's okay to stop
+ // scanning 'rootVal' and append the remaining computed field to the end of the output
+ // object because it will have no observable effect on field order.)
+ if (numFieldsRemaining > numFieldsRemainingThreshold ||
+ numFieldsRemaining != numComputedFieldsRemaining) {
while (be != end - 1) {
auto sv = bson::fieldNameAndLength(be);
auto nextBe = bson::advance(be, sv.size());
+ size_t pos = fieldNames.findPos(sv);
- bool isProjectedOrRestricted;
- size_t pos = fieldsAndProjects.findPos(sv);
if (pos == IndexedStringVector::npos) {
- isProjectedOrRestricted = isInclusion;
- } else if (pos < numFields) {
- isProjectedOrRestricted = !isInclusion;
+ if (!isInclusion) {
+ bob.append(BSONElement(be, sv.size() + 1, nextBe - be));
+ }
+ } else if (pos < numKeepOrDrops) {
+ --numFieldsRemaining;
+
+ if (isInclusion) {
+ bob.append(BSONElement(be, sv.size() + 1, nextBe - be));
+ }
} else {
- isProjectedOrRestricted = true;
- }
+ --numFieldsRemaining;
+ --numComputedFieldsRemaining;
+
+ auto projectIdx = pos - numKeepOrDrops;
+ visited[projectIdx] = 1;
- if (!isProjectedOrRestricted) {
- bob.append(BSONElement(be, sv.size() + 1, nextBe - be));
- --nFieldsIfInclusion;
+ size_t argIdx = stackOffset + projectIdx;
+ auto [_, tag, val] = getFromStack(argIdx);
+ bson::appendValueToBsonObj(bob, fieldNames[pos], tag, val);
}
- if (nFieldsIfInclusion == 0 && isInclusion) {
+ be = nextBe;
+
+ if (numFieldsRemaining <= numFieldsRemainingThreshold &&
+ numFieldsRemaining == numComputedFieldsRemaining) {
break;
}
+ }
+ }
+
+ // If isInclusion == false and 'be' has not reached the end of 'rootVal', copy over
+ // the remaining fields from 'rootVal' to the output object.
+ if (!isInclusion) {
+ while (be != end - 1) {
+ auto sv = bson::fieldNameAndLength(be);
+ auto nextBe = bson::advance(be, sv.size());
+ bob.append(BSONElement(be, sv.size() + 1, nextBe - be));
be = nextBe;
}
}
} else if (rootTag == value::TypeTags::Object) {
- if (!(nFieldsIfInclusion == 0 && isInclusion)) {
- auto objRoot = value::getObjectView(rootVal);
- for (size_t idx = 0; idx < objRoot->size(); ++idx) {
+ auto objRoot = value::getObjectView(rootVal);
+ size_t idx = 0;
+
+ // Let N = number of "computed" fields, and let K = (isInclusion && N > 0 ? N-1 : N).
+ //
+ // When we have seen all of the "keepOrDrop" fields and when we have seen K of the
+ // "computed" fields, we can break out of this loop, ignore or copy the remaining
+ // fields from 'rootVal' (depending on whether 'isInclusion' is true or false), and
+ // then finally append the remaining computed field (if there is one) to the output
+ // object.
+ //
+ // (When isInclusion == true and a single "computed" field remains, it's okay to stop
+ // scanning 'rootVal' and append the remaining computed field to the end of the output
+ // object because it will have no observable effect on field order.)
+ if (numFieldsRemaining > numFieldsRemainingThreshold ||
+ numFieldsRemaining != numComputedFieldsRemaining) {
+ for (; idx < objRoot->size(); ++idx) {
auto sv = StringData(objRoot->field(idx));
+ size_t pos = fieldNames.findPos(sv);
- bool isProjectedOrRestricted;
- size_t pos = fieldsAndProjects.findPos(sv);
if (pos == IndexedStringVector::npos) {
- isProjectedOrRestricted = isInclusion;
- } else if (pos < numFields) {
- isProjectedOrRestricted = !isInclusion;
+ if (!isInclusion) {
+ auto [tag, val] = objRoot->getAt(idx);
+ bson::appendValueToBsonObj(bob, objRoot->field(idx), tag, val);
+ }
+ } else if (pos < numKeepOrDrops) {
+ --numFieldsRemaining;
+
+ if (isInclusion) {
+ auto [tag, val] = objRoot->getAt(idx);
+ bson::appendValueToBsonObj(bob, objRoot->field(idx), tag, val);
+ }
} else {
- isProjectedOrRestricted = true;
- }
+ --numFieldsRemaining;
+ --numComputedFieldsRemaining;
- if (!isProjectedOrRestricted) {
- auto [tag, val] = objRoot->getAt(idx);
- bson::appendValueToBsonObj(bob, objRoot->field(idx), tag, val);
- --nFieldsIfInclusion;
+ auto projectIdx = pos - numKeepOrDrops;
+ visited[projectIdx] = 1;
+
+ size_t argIdx = stackOffset + projectIdx;
+ auto [_, tag, val] = getFromStack(argIdx);
+ bson::appendValueToBsonObj(bob, fieldNames[pos], tag, val);
}
- if (nFieldsIfInclusion == 0 && isInclusion) {
+ if (numFieldsRemaining <= numFieldsRemainingThreshold &&
+ numFieldsRemaining == numComputedFieldsRemaining) {
+ ++idx;
break;
}
}
}
+
+ // If isInclusion == false and 'be' has not reached the end of 'rootVal', copy over
+ // the remaining fields from 'rootVal' to the output object.
+ if (!isInclusion) {
+ for (; idx < objRoot->size(); ++idx) {
+ auto sv = StringData(objRoot->field(idx));
+ auto [fieldTag, fieldVal] = objRoot->getAt(idx);
+ bson::appendValueToBsonObj(bob, sv, fieldTag, fieldVal);
+ }
+ }
}
}
- for (size_t idx = numFields; idx < fieldsAndProjects.size(); ++idx) {
- auto argIdx = startIdx + (idx - numFields);
- auto [_, tag, val] = getFromStack(argIdx);
- bson::appendValueToBsonObj(bob, fieldsAndProjects[idx], tag, val);
+ // Append the remaining computed fields (if any) to the output object.
+ if (numComputedFieldsRemaining > 0) {
+ for (size_t pos = numKeepOrDrops; pos < fieldNames.size(); ++pos) {
+ auto projectIdx = pos - numKeepOrDrops;
+ if (!visited[projectIdx]) {
+ size_t argIdx = stackOffset + projectIdx;
+ auto [_, tag, val] = getFromStack(argIdx);
+ bson::appendValueToBsonObj(bob, fieldNames[pos], tag, val);
+ }
+ }
}
bob.doneFast();
@@ -5418,9 +5511,9 @@ FastTuple<bool, value::TypeTags, value::Value> ByteCode::builtinMakeBsonObj(Arit
auto mos = value::getMakeObjSpecView(mosVal);
- const size_t startIdx = 2;
+ const int stackOffset = 2;
- auto [tag, val] = produceBsonObject(mos, objTag, objVal, startIdx);
+ auto [tag, val] = produceBsonObject(mos, objTag, objVal, stackOffset);
return {true, tag, val};
}
diff --git a/src/mongo/db/exec/sbe/vm/vm.h b/src/mongo/db/exec/sbe/vm/vm.h
index 7b25ff7fb03..2deb8b0c907 100644
--- a/src/mongo/db/exec/sbe/vm/vm.h
+++ b/src/mongo/db/exec/sbe/vm/vm.h
@@ -1405,10 +1405,35 @@ private:
TimeZone timezone,
DayOfWeek startOfWeek);
+ /**
+ * produceBsonObject() takes a MakeObjSpec ('spec'), a root value ('rootTag' and 'rootVal'),
+ * and 0 or more "computed" values as inputs, it builds an output BSON object based on the
+ * instructions provided by 'spec' and based on the contents of 'root' and the computed input
+ * values, and then it returns the output object. (Note the computed input values are not
+ * directly passed in as C++ parameters -- instead the computed input values are passed via
+ * the VM's stack.)
+ *
+ * 'spec' provides two lists of field names: "keepOrDrop" fields and "computed" fields. These
+ * lists are disjoint and do not contain duplicates. The number of computed input values passed
+ * in by the caller on the VM stack must match the number of fields in the "computed" list.
+ *
+ * For each field F in the "computed" list, this method will retrieve the corresponding computed
+ * input value V from the VM stack and add {F,V} to the output object.
+ *
+ * If 'root' is not an object, it is ignored. Otherwise, for each field F in 'root' with value V
+ * that does not appear in the "computed" list, this method will copy {F,V} to the output object
+ * if either: (1) field F appears in the "keepOrDrop" list and 'spec->fieldBehavior == keep'; or
+ * (2) field F does _not_ appear in the "keepOrDrop" list and 'spec->fieldBehavior == drop'. If
+ * neither of these conditions are met, field F in 'root' will be ignored.
+ *
+ * For any two distinct fields F1 and F2 in the output object, if F1 is in 'root' and F2 does
+ * not appear before F1 in 'root', -OR- if both F1 and F2 are not in 'root' and F2 does not
+ * appear before F1 in the "computed" list, then F2 will appear after F1 in the output object.
+ */
std::pair<value::TypeTags, value::Value> produceBsonObject(const MakeObjSpec* mos,
value::TypeTags rootTag,
value::Value rootVal,
- size_t startIdx);
+ int stackOffset);
FastTuple<bool, value::TypeTags, value::Value> builtinSplit(ArityType arity);
FastTuple<bool, value::TypeTags, value::Value> builtinDate(ArityType arity);