From bd7423724f1cd2983ffa177e2ba538859ae4ff63 Mon Sep 17 00:00:00 2001 From: Steve Tarzia Date: Thu, 23 Jun 2022 22:08:29 +0000 Subject: SERVER-66418 fix bug in projection created during dependency analysis --- jstests/aggregation/bugs/server66418.js | 38 +++++++++++ .../db/exec/add_fields_projection_executor.cpp | 32 --------- src/mongo/db/pipeline/dependencies.cpp | 51 ++++++++++++-- src/mongo/db/pipeline/dependencies.h | 14 ++++ src/mongo/db/pipeline/dependencies_test.cpp | 79 ++++++++++++++++++++++ 5 files changed, 178 insertions(+), 36 deletions(-) create mode 100644 jstests/aggregation/bugs/server66418.js diff --git a/jstests/aggregation/bugs/server66418.js b/jstests/aggregation/bugs/server66418.js new file mode 100644 index 00000000000..9b8c960282a --- /dev/null +++ b/jstests/aggregation/bugs/server66418.js @@ -0,0 +1,38 @@ +// SERVER-66418 +// Bad projection created during dependency analysis due to string order assumption +(function() { +"use strict"; + +const coll = db[jsTest.name()]; +coll.drop(); + +coll.save({ + _id: 1, + type: 'PRODUCT', + status: 'VALID', + locale: { + en: 'INSTRUMENT PANEL', + es: 'INSTRUMENTOS DEL CUADRO', + fr: 'INSTRUMENT TABLEAU DE BORD', + } +}); + +// before SERVER-66418, this incorrectly threw a PathCollision error +coll.aggregate([ + {"$match": {"_id": 1}}, + {"$sort": {"_id": 1}}, + { + "$project": { + "designation": { + "$switch": { + "branches": [{ + "case": {"$eq": ["$type", "PRODUCT"]}, + "then": {"$ifNull": ["$locale.en-GB.name", "$locale.en.name"]} + }], + "default": {"$ifNull": ["$locale.en-GB", "$locale.en"]} + } + } + } + } +]); +})(); diff --git a/src/mongo/db/exec/add_fields_projection_executor.cpp b/src/mongo/db/exec/add_fields_projection_executor.cpp index 3c7dcb98134..c6d2f707987 100644 --- a/src/mongo/db/exec/add_fields_projection_executor.cpp +++ b/src/mongo/db/exec/add_fields_projection_executor.cpp @@ -92,38 +92,6 @@ private: // The original object. Used to generate more helpful error messages. const BSONObj& _rawObj; - // Custom comparator that orders fieldpath strings by path prefix first, then by field. - struct PathPrefixComparator { - static constexpr char dot = '.'; - - // Returns true if the lhs value should sort before the rhs, false otherwise. - bool operator()(const std::string& lhs, const std::string& rhs) const { - for (size_t pos = 0, len = std::min(lhs.size(), rhs.size()); pos < len; ++pos) { - auto &lchar = lhs[pos], &rchar = rhs[pos]; - if (lchar == rchar) { - continue; - } - - // Consider the path delimiter '.' as being less than all other characters, so that - // paths sort directly before any paths they prefix and directly after any paths - // which prefix them. - if (lchar == dot) { - return true; - } else if (rchar == dot) { - return false; - } - - // Otherwise, default to normal character comparison. - return lchar < rchar; - } - - // If we get here, then we have reached the end of lhs and/or rhs and all of their path - // segments up to this point match. If lhs is shorter than rhs, then lhs prefixes rhs - // and should sort before it. - return lhs.size() < rhs.size(); - } - }; - // Tracks which paths we've seen to ensure no two paths conflict with each other. std::set _seenPaths; }; diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp index c53fe60e2c1..8107322f112 100644 --- a/src/mongo/db/pipeline/dependencies.cpp +++ b/src/mongo/db/pipeline/dependencies.cpp @@ -37,6 +37,13 @@ namespace mongo { +std::list DepsTracker::sortedFields() const { + // Use a special comparator to put parent fieldpaths before their children. + std::list sortedFields(fields.begin(), fields.end()); + sortedFields.sort(PathPrefixComparator()); + return sortedFields; +} + BSONObj DepsTracker::toProjectionWithoutMetadata() const { BSONObjBuilder bb; @@ -51,17 +58,21 @@ BSONObj DepsTracker::toProjectionWithoutMetadata() const { return bb.obj(); } + // Go through dependency fieldpaths to find the minimal set of projections that cover the + // dependencies. For example, the dependencies ["a.b", "a.b.c.g", "c", "c.d", "f"] would be + // minimally covered by the projection {"a.b": 1, "c": 1, "f": 1}. The key operation here is + // folding dependencies into ancestor dependencies, wherever possible. This is assisted by a + // special sort in DepsTracker::sortedFields that treats '.' as the first char and thus places + // parent paths directly before their children. bool idSpecified = false; std::string last; - for (const auto& field : fields) { + for (const auto& field : sortedFields()) { if (str::startsWith(field, "_id") && (field.size() == 3 || field[3] == '.')) { idSpecified = true; } if (!last.empty() && str::startsWith(field, last)) { - // we are including a parent of *it so we don't need to include this field - // explicitly. This logic relies on on set iterators going in lexicographic order so - // that a string is always directly before of all fields it prefixes. + // We are including a parent of this field, so we can skip this field. continue; } @@ -92,4 +103,36 @@ void DepsTracker::setNeedsMetadata(DocumentMetadataFields::MetaType type, bool r invariant(required || !_metadataDeps[type]); _metadataDeps[type] = required; } + +// Returns true if the lhs value should sort before the rhs, false otherwise. +bool PathPrefixComparator::operator()(const std::string& lhs, const std::string& rhs) const { + constexpr char dot = '.'; + + for (size_t pos = 0, len = std::min(lhs.size(), rhs.size()); pos < len; ++pos) { + // Below, we explicitly choose unsigned char because the usual const char& returned by + // operator[] is actually signed on x86 and will incorrectly order unicode characters. + unsigned char lchar = lhs[pos], rchar = rhs[pos]; + if (lchar == rchar) { + continue; + } + + // Consider the path delimiter '.' as being less than all other characters, so that + // paths sort directly before any paths they prefix and directly after any paths + // which prefix them. + if (lchar == dot) { + return true; + } else if (rchar == dot) { + return false; + } + + // Otherwise, default to normal character comparison. + return lchar < rchar; + } + + // If we get here, then we have reached the end of lhs and/or rhs and all of their path + // segments up to this point match. If lhs is shorter than rhs, then lhs prefixes rhs + // and should sort before it. + return lhs.size() < rhs.size(); +} + } // namespace mongo diff --git a/src/mongo/db/pipeline/dependencies.h b/src/mongo/db/pipeline/dependencies.h index 8b75439db53..010f7e32d3b 100644 --- a/src/mongo/db/pipeline/dependencies.h +++ b/src/mongo/db/pipeline/dependencies.h @@ -173,6 +173,11 @@ struct DepsTracker { } } + /** + * Return fieldpaths ordered such that a parent is immediately before its children. + */ + std::list sortedFields() const; + std::set fields; // Names of needed fields in dotted notation. std::set vars; // IDs of referenced variables. bool needWholeDocument = false; // If true, ignore 'fields'; the whole document is needed. @@ -185,4 +190,13 @@ private: // dependency analysis. QueryMetadataBitSet _metadataDeps; }; + + +/** Custom comparator that orders fieldpath strings by path prefix first, then by field. + * This ensures that a parent field is ordered directly before its children. + */ +struct PathPrefixComparator { + /* Returns true if the lhs value should sort before the rhs, false otherwise. */ + bool operator()(const std::string& lhs, const std::string& rhs) const; +}; } // namespace mongo diff --git a/src/mongo/db/pipeline/dependencies_test.cpp b/src/mongo/db/pipeline/dependencies_test.cpp index 58e1066d158..7b9a6f50794 100644 --- a/src/mongo/db/pipeline/dependencies_test.cpp +++ b/src/mongo/db/pipeline/dependencies_test.cpp @@ -83,6 +83,13 @@ TEST(DependenciesToProjectionTest, ShouldNotIncludeSubFieldIfTopLevelAlreadyIncl ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(), BSON("a" << 1 << "b" << 1 << "_id" << 0)); } +TEST(DependenciesToProjectionTest, ExcludeIndirectDescendants) { + const char* array[] = {"a.b", "_id", "a.b.c.d.e"}; + DepsTracker deps; + deps.fields = arrayToSet(array); + ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(), BSON("_id" << 1 << "a.b" << 1)); +} + TEST(DependenciesToProjectionTest, ShouldIncludeIdIfNeeded) { const char* array[] = {"a", "_id"}; DepsTracker deps; @@ -120,6 +127,27 @@ TEST(DependenciesToProjectionTest, ShouldIncludeFieldPrefixedByIdWhenIdSubfieldI BSON("_id.a" << 1 << "_id_a" << 1 << "a" << 1)); } +// SERVER-66418 +TEST(DependenciesToProjectionTest, ChildCoveredByParentWithSpecialChars) { + // without "_id" + { + // This is an important test case because '-' is one of the few chars before '.' in utf-8. + const char* array[] = {"a", "a-b", "a.b"}; + DepsTracker deps; + deps.fields = arrayToSet(array); + ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(), + BSON("a" << 1 << "a-b" << 1 << "_id" << 0)); + } + // with "_id" + { + const char* array[] = {"_id", "a", "a-b", "a.b"}; + DepsTracker deps; + deps.fields = arrayToSet(array); + ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(), + BSON("_id" << 1 << "a" << 1 << "a-b" << 1)); + } +} + TEST(DependenciesToProjectionTest, ShouldOutputEmptyObjectIfEntireDocumentNeeded) { const char* array[] = {"a"}; // fields ignored with needWholeDocument DepsTracker deps; @@ -180,5 +208,56 @@ TEST(DependenciesToProjectionTest, ASSERT_TRUE(deps.metadataDeps()[DocumentMetadataFields::kTextScore]); } +TEST(DependenciesToProjectionTest, SortFieldPaths) { + const char* array[] = {"", + "A", + "_id", + "a", + "a.b", + "a.b.c", + "a.c", + // '-' char in utf-8 comes before '.' but our special fieldpath sort + // puts '.' first so that children directly follow their parents. + "a-b", + "a-b.ear", + "a-bear", + "a-bear.", + "a🌲", + "b", + "b.a", + "b.aa", + "b.🌲d"}; + DepsTracker deps; + deps.fields = arrayToSet(array); + // our custom sort will restore the ordering above + std::list fieldPathSorted = deps.sortedFields(); + auto itr = fieldPathSorted.begin(); + for (unsigned long i = 0; i < fieldPathSorted.size(); i++) { + ASSERT_EQ(*itr, array[i]); + ++itr; + } +} + +TEST(DependenciesToProjectionTest, PathLessThan) { + auto lessThan = PathPrefixComparator(); + ASSERT_FALSE(lessThan("a", "a")); + ASSERT_TRUE(lessThan("a", "aa")); + ASSERT_TRUE(lessThan("a", "b")); + ASSERT_TRUE(lessThan("", "a")); + ASSERT_TRUE(lessThan("Aa", "aa")); + ASSERT_TRUE(lessThan("a.b", "ab")); + ASSERT_TRUE(lessThan("a.b", "a-b")); // SERVER-66418 + ASSERT_TRUE(lessThan("a.b", "a b")); // SERVER-66418 + // verify the difference from the standard sort + ASSERT_TRUE(std::string("a.b") > std::string("a-b")); + ASSERT_TRUE(std::string("a.b") > std::string("a b")); + // test unicode behavior + ASSERT_TRUE(lessThan("a.b", "a🌲")); + ASSERT_TRUE(lessThan("a.b", "a🌲b")); + ASSERT_TRUE(lessThan("🌲", "🌳")); // U+1F332 < U+1F333 + ASSERT_TRUE(lessThan("🌲", "🌲.b")); + ASSERT_FALSE(lessThan("🌲.b", "🌲")); +} + } // namespace } // namespace mongo -- cgit v1.2.1