summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Tarzia <steve.tarzia@mongodb.com>2022-06-23 22:08:29 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-06-23 22:31:56 +0000
commitbd7423724f1cd2983ffa177e2ba538859ae4ff63 (patch)
tree185e870dd67dad96938f8ce1d94a187568a75b3c
parentead0d926edbbcbb795776a2f05f8e4245920c8ff (diff)
downloadmongo-bd7423724f1cd2983ffa177e2ba538859ae4ff63.tar.gz
SERVER-66418 fix bug in projection created during dependency analysis
-rw-r--r--jstests/aggregation/bugs/server66418.js38
-rw-r--r--src/mongo/db/exec/add_fields_projection_executor.cpp32
-rw-r--r--src/mongo/db/pipeline/dependencies.cpp51
-rw-r--r--src/mongo/db/pipeline/dependencies.h14
-rw-r--r--src/mongo/db/pipeline/dependencies_test.cpp79
5 files changed, 178 insertions, 36 deletions
diff --git a/jstests/aggregation/bugs/server66418.js b/jstests/aggregation/bugs/server66418.js
new file mode 100644
index 00000000000..9b8c960282a
--- /dev/null
+++ b/jstests/aggregation/bugs/server66418.js
@@ -0,0 +1,38 @@
+// SERVER-66418
+// Bad projection created during dependency analysis due to string order assumption
+(function() {
+"use strict";
+
+const coll = db[jsTest.name()];
+coll.drop();
+
+coll.save({
+ _id: 1,
+ type: 'PRODUCT',
+ status: 'VALID',
+ locale: {
+ en: 'INSTRUMENT PANEL',
+ es: 'INSTRUMENTOS DEL CUADRO',
+ fr: 'INSTRUMENT TABLEAU DE BORD',
+ }
+});
+
+// before SERVER-66418, this incorrectly threw a PathCollision error
+coll.aggregate([
+ {"$match": {"_id": 1}},
+ {"$sort": {"_id": 1}},
+ {
+ "$project": {
+ "designation": {
+ "$switch": {
+ "branches": [{
+ "case": {"$eq": ["$type", "PRODUCT"]},
+ "then": {"$ifNull": ["$locale.en-GB.name", "$locale.en.name"]}
+ }],
+ "default": {"$ifNull": ["$locale.en-GB", "$locale.en"]}
+ }
+ }
+ }
+ }
+]);
+})();
diff --git a/src/mongo/db/exec/add_fields_projection_executor.cpp b/src/mongo/db/exec/add_fields_projection_executor.cpp
index 3c7dcb98134..c6d2f707987 100644
--- a/src/mongo/db/exec/add_fields_projection_executor.cpp
+++ b/src/mongo/db/exec/add_fields_projection_executor.cpp
@@ -92,38 +92,6 @@ private:
// The original object. Used to generate more helpful error messages.
const BSONObj& _rawObj;
- // Custom comparator that orders fieldpath strings by path prefix first, then by field.
- struct PathPrefixComparator {
- static constexpr char dot = '.';
-
- // Returns true if the lhs value should sort before the rhs, false otherwise.
- bool operator()(const std::string& lhs, const std::string& rhs) const {
- for (size_t pos = 0, len = std::min(lhs.size(), rhs.size()); pos < len; ++pos) {
- auto &lchar = lhs[pos], &rchar = rhs[pos];
- if (lchar == rchar) {
- continue;
- }
-
- // Consider the path delimiter '.' as being less than all other characters, so that
- // paths sort directly before any paths they prefix and directly after any paths
- // which prefix them.
- if (lchar == dot) {
- return true;
- } else if (rchar == dot) {
- return false;
- }
-
- // Otherwise, default to normal character comparison.
- return lchar < rchar;
- }
-
- // If we get here, then we have reached the end of lhs and/or rhs and all of their path
- // segments up to this point match. If lhs is shorter than rhs, then lhs prefixes rhs
- // and should sort before it.
- return lhs.size() < rhs.size();
- }
- };
-
// Tracks which paths we've seen to ensure no two paths conflict with each other.
std::set<std::string, PathPrefixComparator> _seenPaths;
};
diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp
index c53fe60e2c1..8107322f112 100644
--- a/src/mongo/db/pipeline/dependencies.cpp
+++ b/src/mongo/db/pipeline/dependencies.cpp
@@ -37,6 +37,13 @@
namespace mongo {
+std::list<std::string> DepsTracker::sortedFields() const {
+ // Use a special comparator to put parent fieldpaths before their children.
+ std::list<std::string> sortedFields(fields.begin(), fields.end());
+ sortedFields.sort(PathPrefixComparator());
+ return sortedFields;
+}
+
BSONObj DepsTracker::toProjectionWithoutMetadata() const {
BSONObjBuilder bb;
@@ -51,17 +58,21 @@ BSONObj DepsTracker::toProjectionWithoutMetadata() const {
return bb.obj();
}
+ // Go through dependency fieldpaths to find the minimal set of projections that cover the
+ // dependencies. For example, the dependencies ["a.b", "a.b.c.g", "c", "c.d", "f"] would be
+ // minimally covered by the projection {"a.b": 1, "c": 1, "f": 1}. The key operation here is
+ // folding dependencies into ancestor dependencies, wherever possible. This is assisted by a
+ // special sort in DepsTracker::sortedFields that treats '.' as the first char and thus places
+ // parent paths directly before their children.
bool idSpecified = false;
std::string last;
- for (const auto& field : fields) {
+ for (const auto& field : sortedFields()) {
if (str::startsWith(field, "_id") && (field.size() == 3 || field[3] == '.')) {
idSpecified = true;
}
if (!last.empty() && str::startsWith(field, last)) {
- // we are including a parent of *it so we don't need to include this field
- // explicitly. This logic relies on on set iterators going in lexicographic order so
- // that a string is always directly before of all fields it prefixes.
+ // We are including a parent of this field, so we can skip this field.
continue;
}
@@ -92,4 +103,36 @@ void DepsTracker::setNeedsMetadata(DocumentMetadataFields::MetaType type, bool r
invariant(required || !_metadataDeps[type]);
_metadataDeps[type] = required;
}
+
+// Returns true if the lhs value should sort before the rhs, false otherwise.
+bool PathPrefixComparator::operator()(const std::string& lhs, const std::string& rhs) const {
+ constexpr char dot = '.';
+
+ for (size_t pos = 0, len = std::min(lhs.size(), rhs.size()); pos < len; ++pos) {
+ // Below, we explicitly choose unsigned char because the usual const char& returned by
+ // operator[] is actually signed on x86 and will incorrectly order unicode characters.
+ unsigned char lchar = lhs[pos], rchar = rhs[pos];
+ if (lchar == rchar) {
+ continue;
+ }
+
+ // Consider the path delimiter '.' as being less than all other characters, so that
+ // paths sort directly before any paths they prefix and directly after any paths
+ // which prefix them.
+ if (lchar == dot) {
+ return true;
+ } else if (rchar == dot) {
+ return false;
+ }
+
+ // Otherwise, default to normal character comparison.
+ return lchar < rchar;
+ }
+
+ // If we get here, then we have reached the end of lhs and/or rhs and all of their path
+ // segments up to this point match. If lhs is shorter than rhs, then lhs prefixes rhs
+ // and should sort before it.
+ return lhs.size() < rhs.size();
+}
+
} // namespace mongo
diff --git a/src/mongo/db/pipeline/dependencies.h b/src/mongo/db/pipeline/dependencies.h
index 8b75439db53..010f7e32d3b 100644
--- a/src/mongo/db/pipeline/dependencies.h
+++ b/src/mongo/db/pipeline/dependencies.h
@@ -173,6 +173,11 @@ struct DepsTracker {
}
}
+ /**
+ * Return fieldpaths ordered such that a parent is immediately before its children.
+ */
+ std::list<std::string> sortedFields() const;
+
std::set<std::string> fields; // Names of needed fields in dotted notation.
std::set<Variables::Id> vars; // IDs of referenced variables.
bool needWholeDocument = false; // If true, ignore 'fields'; the whole document is needed.
@@ -185,4 +190,13 @@ private:
// dependency analysis.
QueryMetadataBitSet _metadataDeps;
};
+
+
+/** Custom comparator that orders fieldpath strings by path prefix first, then by field.
+ * This ensures that a parent field is ordered directly before its children.
+ */
+struct PathPrefixComparator {
+ /* Returns true if the lhs value should sort before the rhs, false otherwise. */
+ bool operator()(const std::string& lhs, const std::string& rhs) const;
+};
} // namespace mongo
diff --git a/src/mongo/db/pipeline/dependencies_test.cpp b/src/mongo/db/pipeline/dependencies_test.cpp
index 58e1066d158..7b9a6f50794 100644
--- a/src/mongo/db/pipeline/dependencies_test.cpp
+++ b/src/mongo/db/pipeline/dependencies_test.cpp
@@ -83,6 +83,13 @@ TEST(DependenciesToProjectionTest, ShouldNotIncludeSubFieldIfTopLevelAlreadyIncl
ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(), BSON("a" << 1 << "b" << 1 << "_id" << 0));
}
+TEST(DependenciesToProjectionTest, ExcludeIndirectDescendants) {
+ const char* array[] = {"a.b", "_id", "a.b.c.d.e"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(), BSON("_id" << 1 << "a.b" << 1));
+}
+
TEST(DependenciesToProjectionTest, ShouldIncludeIdIfNeeded) {
const char* array[] = {"a", "_id"};
DepsTracker deps;
@@ -120,6 +127,27 @@ TEST(DependenciesToProjectionTest, ShouldIncludeFieldPrefixedByIdWhenIdSubfieldI
BSON("_id.a" << 1 << "_id_a" << 1 << "a" << 1));
}
+// SERVER-66418
+TEST(DependenciesToProjectionTest, ChildCoveredByParentWithSpecialChars) {
+ // without "_id"
+ {
+ // This is an important test case because '-' is one of the few chars before '.' in utf-8.
+ const char* array[] = {"a", "a-b", "a.b"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(),
+ BSON("a" << 1 << "a-b" << 1 << "_id" << 0));
+ }
+ // with "_id"
+ {
+ const char* array[] = {"_id", "a", "a-b", "a.b"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(),
+ BSON("_id" << 1 << "a" << 1 << "a-b" << 1));
+ }
+}
+
TEST(DependenciesToProjectionTest, ShouldOutputEmptyObjectIfEntireDocumentNeeded) {
const char* array[] = {"a"}; // fields ignored with needWholeDocument
DepsTracker deps;
@@ -180,5 +208,56 @@ TEST(DependenciesToProjectionTest,
ASSERT_TRUE(deps.metadataDeps()[DocumentMetadataFields::kTextScore]);
}
+TEST(DependenciesToProjectionTest, SortFieldPaths) {
+ const char* array[] = {"",
+ "A",
+ "_id",
+ "a",
+ "a.b",
+ "a.b.c",
+ "a.c",
+ // '-' char in utf-8 comes before '.' but our special fieldpath sort
+ // puts '.' first so that children directly follow their parents.
+ "a-b",
+ "a-b.ear",
+ "a-bear",
+ "a-bear.",
+ "a🌲",
+ "b",
+ "b.a",
+ "b.aa",
+ "b.🌲d"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ // our custom sort will restore the ordering above
+ std::list<std::string> fieldPathSorted = deps.sortedFields();
+ auto itr = fieldPathSorted.begin();
+ for (unsigned long i = 0; i < fieldPathSorted.size(); i++) {
+ ASSERT_EQ(*itr, array[i]);
+ ++itr;
+ }
+}
+
+TEST(DependenciesToProjectionTest, PathLessThan) {
+ auto lessThan = PathPrefixComparator();
+ ASSERT_FALSE(lessThan("a", "a"));
+ ASSERT_TRUE(lessThan("a", "aa"));
+ ASSERT_TRUE(lessThan("a", "b"));
+ ASSERT_TRUE(lessThan("", "a"));
+ ASSERT_TRUE(lessThan("Aa", "aa"));
+ ASSERT_TRUE(lessThan("a.b", "ab"));
+ ASSERT_TRUE(lessThan("a.b", "a-b")); // SERVER-66418
+ ASSERT_TRUE(lessThan("a.b", "a b")); // SERVER-66418
+ // verify the difference from the standard sort
+ ASSERT_TRUE(std::string("a.b") > std::string("a-b"));
+ ASSERT_TRUE(std::string("a.b") > std::string("a b"));
+ // test unicode behavior
+ ASSERT_TRUE(lessThan("a.b", "a🌲"));
+ ASSERT_TRUE(lessThan("a.b", "a🌲b"));
+ ASSERT_TRUE(lessThan("🌲", "🌳")); // U+1F332 < U+1F333
+ ASSERT_TRUE(lessThan("🌲", "🌲.b"));
+ ASSERT_FALSE(lessThan("🌲.b", "🌲"));
+}
+
} // namespace
} // namespace mongo