summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Tarzia <steve.tarzia@mongodb.com>2022-06-24 14:10:05 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-06-24 14:46:02 +0000
commit20ee02b3ba6a0d8a120eb4fdc947c92d71fd66bd (patch)
tree0faa8ea67dc4f7fade9874a2234d321151829fc8
parentffb583eb93ca33c34f9144900ffe9fe2cdeca857 (diff)
downloadmongo-20ee02b3ba6a0d8a120eb4fdc947c92d71fd66bd.tar.gz
SERVER-66418 fix bug in projection created during dependency analysis
-rw-r--r--jstests/aggregation/bugs/server66418.js38
-rw-r--r--src/mongo/db/pipeline/dependencies.cpp59
-rw-r--r--src/mongo/db/pipeline/dependencies.h14
-rw-r--r--src/mongo/db/pipeline/dependencies_test.cpp77
-rw-r--r--src/mongo/db/pipeline/parsed_aggregation_projection.h32
5 files changed, 179 insertions, 41 deletions
diff --git a/jstests/aggregation/bugs/server66418.js b/jstests/aggregation/bugs/server66418.js
new file mode 100644
index 00000000000..9b8c960282a
--- /dev/null
+++ b/jstests/aggregation/bugs/server66418.js
@@ -0,0 +1,38 @@
+// SERVER-66418
+// Bad projection created during dependency analysis due to string order assumption
+(function() {
+"use strict";
+
+const coll = db[jsTest.name()];
+coll.drop();
+
+coll.save({
+ _id: 1,
+ type: 'PRODUCT',
+ status: 'VALID',
+ locale: {
+ en: 'INSTRUMENT PANEL',
+ es: 'INSTRUMENTOS DEL CUADRO',
+ fr: 'INSTRUMENT TABLEAU DE BORD',
+ }
+});
+
+// before SERVER-66418, this incorrectly threw a PathCollision error
+coll.aggregate([
+ {"$match": {"_id": 1}},
+ {"$sort": {"_id": 1}},
+ {
+ "$project": {
+ "designation": {
+ "$switch": {
+ "branches": [{
+ "case": {"$eq": ["$type", "PRODUCT"]},
+ "then": {"$ifNull": ["$locale.en-GB.name", "$locale.en.name"]}
+ }],
+ "default": {"$ifNull": ["$locale.en-GB", "$locale.en"]}
+ }
+ }
+ }
+ }
+]);
+})();
diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp
index 8c92cd5c751..4e2cb214dd9 100644
--- a/src/mongo/db/pipeline/dependencies.cpp
+++ b/src/mongo/db/pipeline/dependencies.cpp
@@ -62,6 +62,13 @@ bool DepsTracker::_appendMetaProjections(BSONObjBuilder* projectionBuilder) cons
return (_needTextScore || _needSortKey || _needGeoNearDistance || _needGeoNearPoint);
}
+std::list<std::string> DepsTracker::sortedFields() const {
+ // Use a special comparator to put parent fieldpaths before their children.
+ std::list<std::string> sortedFields(fields.begin(), fields.end());
+ sortedFields.sort(PathPrefixComparator());
+ return sortedFields;
+}
+
BSONObj DepsTracker::toProjection() const {
BSONObjBuilder bb;
@@ -83,21 +90,23 @@ BSONObj DepsTracker::toProjection() const {
return bb.obj();
}
- bool needId = false;
+ // Go through dependency fieldpaths to find the minimal set of projections that cover the
+ // dependencies. For example, the dependencies ["a.b", "a.b.c.g", "c", "c.d", "f"] would be
+ // minimally covered by the projection {"a.b": 1, "c": 1, "f": 1}. The key operation here is
+ // folding dependencies into ancestor dependencies, wherever possible. This is assisted by a
+ // special sort in DepsTracker::sortedFields that treats '.' as the first char and thus places
+ // parent paths directly before their children.
+ bool idSpecified = false;
std::string last;
- for (const auto& field : fields) {
+ for (const auto& field : sortedFields()) {
if (str::startsWith(field, "_id") && (field.size() == 3 || field[3] == '.')) {
// _id and subfields are handled specially due in part to SERVER-7502
- needId = true;
+ idSpecified = true;
continue;
}
if (!last.empty() && str::startsWith(field, last)) {
- // we are including a parent of *it so we don't need to include this field
- // explicitly. In fact, due to SERVER-6527 if we included this field, the parent
- // wouldn't be fully included. This logic relies on on set iterators going in
- // lexicographic order so that a string is always directly before of all fields it
- // prefixes.
+ // We are including a parent of this field, so we can skip this field.
continue;
}
@@ -110,7 +119,7 @@ BSONObj DepsTracker::toProjection() const {
bb.append(field, 1);
}
- if (needId) // we are explicit either way
+ if (idSpecified) // we are explicit either way
bb.append("_id", 1);
else
bb.append("_id", 0);
@@ -287,4 +296,36 @@ Document documentHelper(const BSONObj& bson, const Document& neededFields, int n
Document ParsedDeps::extractFields(const BSONObj& input) const {
return documentHelper(input, _fields, _nFields);
}
+
+// Returns true if the lhs value should sort before the rhs, false otherwise.
+bool PathPrefixComparator::operator()(const std::string& lhs, const std::string& rhs) const {
+ constexpr char dot = '.';
+
+ for (size_t pos = 0, len = std::min(lhs.size(), rhs.size()); pos < len; ++pos) {
+ // Below, we explicitly choose unsigned char because the usual const char& returned by
+ // operator[] is actually signed on x86 and will incorrectly order unicode characters.
+ unsigned char lchar = lhs[pos], rchar = rhs[pos];
+ if (lchar == rchar) {
+ continue;
+ }
+
+ // Consider the path delimiter '.' as being less than all other characters, so that
+ // paths sort directly before any paths they prefix and directly after any paths
+ // which prefix them.
+ if (lchar == dot) {
+ return true;
+ } else if (rchar == dot) {
+ return false;
+ }
+
+ // Otherwise, default to normal character comparison.
+ return lchar < rchar;
+ }
+
+ // If we get here, then we have reached the end of lhs and/or rhs and all of their path
+ // segments up to this point match. If lhs is shorter than rhs, then lhs prefixes rhs
+ // and should sort before it.
+ return lhs.size() < rhs.size();
+}
+
} // namespace mongo
diff --git a/src/mongo/db/pipeline/dependencies.h b/src/mongo/db/pipeline/dependencies.h
index 3487584a4a0..525ed567d8a 100644
--- a/src/mongo/db/pipeline/dependencies.h
+++ b/src/mongo/db/pipeline/dependencies.h
@@ -170,6 +170,11 @@ struct DepsTracker {
*/
std::vector<MetadataType> getAllRequiredMetadataTypes() const;
+ /**
+ * Return fieldpaths ordered such that a parent is immediately before its children.
+ */
+ std::list<std::string> sortedFields() const;
+
std::set<std::string> fields; // Names of needed fields in dotted notation.
std::set<Variables::Id> vars; // IDs of referenced variables.
bool needWholeDocument = false; // If true, ignore 'fields'; the whole document is needed.
@@ -205,4 +210,13 @@ private:
Document _fields;
int _nFields; // Cache the number of top-level fields needed.
};
+
+
+/** Custom comparator that orders fieldpath strings by path prefix first, then by field.
+ * This ensures that a parent field is ordered directly before its children.
+ */
+struct PathPrefixComparator {
+ /* Returns true if the lhs value should sort before the rhs, false otherwise. */
+ bool operator()(const std::string& lhs, const std::string& rhs) const;
+};
} // namespace mongo
diff --git a/src/mongo/db/pipeline/dependencies_test.cpp b/src/mongo/db/pipeline/dependencies_test.cpp
index 6d2741a78e4..dd8d62ad2c7 100644
--- a/src/mongo/db/pipeline/dependencies_test.cpp
+++ b/src/mongo/db/pipeline/dependencies_test.cpp
@@ -76,6 +76,13 @@ TEST(DependenciesToProjectionTest, ShouldNotIncludeSubFieldIfTopLevelAlreadyIncl
ASSERT_BSONOBJ_EQ(deps.toProjection(), BSON("a" << 1 << "b" << 1 << "_id" << 0));
}
+TEST(DependenciesToProjectionTest, ExcludeIndirectDescendants) {
+ const char* array[] = {"a.b", "_id", "a.b.c.d.e"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ ASSERT_BSONOBJ_EQ(deps.toProjection(), BSON("a.b" << 1 << "_id" << 1));
+}
+
TEST(DependenciesToProjectionTest, ShouldIncludeIdIfNeeded) {
const char* array[] = {"a", "_id"};
DepsTracker deps;
@@ -104,6 +111,25 @@ TEST(DependenciesToProjectionTest, ShouldIncludeFieldPrefixedById) {
ASSERT_BSONOBJ_EQ(deps.toProjection(), BSON("_id_a" << 1 << "a" << 1 << "_id" << 1));
}
+// SERVER-66418
+TEST(DependenciesToProjectionTest, ChildCoveredByParentWithSpecialChars) {
+ // without "_id"
+ {
+ // This is an important test case because '-' is one of the few chars before '.' in utf-8.
+ const char* array[] = {"a", "a-b", "a.b"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ ASSERT_BSONOBJ_EQ(deps.toProjection(), BSON("a" << 1 << "a-b" << 1 << "_id" << 0));
+ }
+ // with "_id"
+ {
+ const char* array[] = {"_id", "a", "a-b", "a.b"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ ASSERT_BSONOBJ_EQ(deps.toProjection(), BSON("a" << 1 << "a-b" << 1 << "_id" << 1));
+ }
+}
+
TEST(DependenciesToProjectionTest, ShouldOutputEmptyObjectIfEntireDocumentNeeded) {
const char* array[] = {"a"}; // fields ignored with needWholeDocument
DepsTracker deps;
@@ -159,5 +185,56 @@ TEST(DependenciesToProjectionTest,
ASSERT_BSONOBJ_EQ(deps.toProjection(), BSON(Document::metaFieldTextScore << metaTextScore));
}
+TEST(DependenciesToProjectionTest, SortFieldPaths) {
+ const char* array[] = {"",
+ "A",
+ "_id",
+ "a",
+ "a.b",
+ "a.b.c",
+ "a.c",
+ // '-' char in utf-8 comes before '.' but our special fieldpath sort
+ // puts '.' first so that children directly follow their parents.
+ "a-b",
+ "a-b.ear",
+ "a-bear",
+ "a-bear.",
+ "a🌲",
+ "b",
+ "b.a"
+ "b.aa"
+ "b.🌲d"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ // our custom sort will restore the ordering above
+ std::list<std::string> fieldPathSorted = deps.sortedFields();
+ auto itr = fieldPathSorted.begin();
+ for (unsigned long i = 0; i < fieldPathSorted.size(); i++) {
+ ASSERT_EQ(*itr, array[i]);
+ ++itr;
+ }
+}
+
+TEST(DependenciesToProjectionTest, PathLessThan) {
+ auto lessThan = PathPrefixComparator();
+ ASSERT_FALSE(lessThan("a", "a"));
+ ASSERT_TRUE(lessThan("a", "aa"));
+ ASSERT_TRUE(lessThan("a", "b"));
+ ASSERT_TRUE(lessThan("", "a"));
+ ASSERT_TRUE(lessThan("Aa", "aa"));
+ ASSERT_TRUE(lessThan("a.b", "ab"));
+ ASSERT_TRUE(lessThan("a.b", "a-b")); // SERVER-66418
+ ASSERT_TRUE(lessThan("a.b", "a b")); // SERVER-66418
+ // verify the difference from the standard sort
+ ASSERT_TRUE(std::string("a.b") > std::string("a-b"));
+ ASSERT_TRUE(std::string("a.b") > std::string("a b"));
+ // test unicode behavior
+ ASSERT_TRUE(lessThan("a.b", "a🌲"));
+ ASSERT_TRUE(lessThan("a.b", "a🌲b"));
+ ASSERT_TRUE(lessThan("🌲", "🌳")); // U+1F332 < U+1F333
+ ASSERT_TRUE(lessThan("🌲", "🌲.b"));
+ ASSERT_FALSE(lessThan("🌲.b", "🌲"));
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/pipeline/parsed_aggregation_projection.h b/src/mongo/db/pipeline/parsed_aggregation_projection.h
index 0542847e9ba..5ea116e7a58 100644
--- a/src/mongo/db/pipeline/parsed_aggregation_projection.h
+++ b/src/mongo/db/pipeline/parsed_aggregation_projection.h
@@ -98,38 +98,6 @@ private:
// The original object. Used to generate more helpful error messages.
const BSONObj& _rawObj;
- // Custom comparator that orders fieldpath strings by path prefix first, then by field.
- struct PathPrefixComparator {
- static constexpr char dot = '.';
-
- // Returns true if the lhs value should sort before the rhs, false otherwise.
- bool operator()(const std::string& lhs, const std::string& rhs) const {
- for (size_t pos = 0, len = std::min(lhs.size(), rhs.size()); pos < len; ++pos) {
- auto &lchar = lhs[pos], &rchar = rhs[pos];
- if (lchar == rchar) {
- continue;
- }
-
- // Consider the path delimiter '.' as being less than all other characters, so that
- // paths sort directly before any paths they prefix and directly after any paths
- // which prefix them.
- if (lchar == dot) {
- return true;
- } else if (rchar == dot) {
- return false;
- }
-
- // Otherwise, default to normal character comparison.
- return lchar < rchar;
- }
-
- // If we get here, then we have reached the end of lhs and/or rhs and all of their path
- // segments up to this point match. If lhs is shorter than rhs, then lhs prefixes rhs
- // and should sort before it.
- return lhs.size() < rhs.size();
- }
- };
-
// Tracks which paths we've seen to ensure no two paths conflict with each other.
std::set<std::string, PathPrefixComparator> _seenPaths;
};