diff options
-rw-r--r-- | jstests/aggregation/bugs/server66418.js | 38 | ||||
-rw-r--r-- | src/mongo/db/pipeline/dependencies.cpp | 59 | ||||
-rw-r--r-- | src/mongo/db/pipeline/dependencies.h | 14 | ||||
-rw-r--r-- | src/mongo/db/pipeline/dependencies_test.cpp | 77 | ||||
-rw-r--r-- | src/mongo/db/pipeline/parsed_aggregation_projection.h | 32 |
5 files changed, 179 insertions, 41 deletions
diff --git a/jstests/aggregation/bugs/server66418.js b/jstests/aggregation/bugs/server66418.js new file mode 100644 index 00000000000..9b8c960282a --- /dev/null +++ b/jstests/aggregation/bugs/server66418.js @@ -0,0 +1,38 @@ +// SERVER-66418 +// Bad projection created during dependency analysis due to string order assumption +(function() { +"use strict"; + +const coll = db[jsTest.name()]; +coll.drop(); + +coll.save({ + _id: 1, + type: 'PRODUCT', + status: 'VALID', + locale: { + en: 'INSTRUMENT PANEL', + es: 'INSTRUMENTOS DEL CUADRO', + fr: 'INSTRUMENT TABLEAU DE BORD', + } +}); + +// before SERVER-66418, this incorrectly threw a PathCollision error +coll.aggregate([ + {"$match": {"_id": 1}}, + {"$sort": {"_id": 1}}, + { + "$project": { + "designation": { + "$switch": { + "branches": [{ + "case": {"$eq": ["$type", "PRODUCT"]}, + "then": {"$ifNull": ["$locale.en-GB.name", "$locale.en.name"]} + }], + "default": {"$ifNull": ["$locale.en-GB", "$locale.en"]} + } + } + } + } +]); +})(); diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp index 8c92cd5c751..4e2cb214dd9 100644 --- a/src/mongo/db/pipeline/dependencies.cpp +++ b/src/mongo/db/pipeline/dependencies.cpp @@ -62,6 +62,13 @@ bool DepsTracker::_appendMetaProjections(BSONObjBuilder* projectionBuilder) cons return (_needTextScore || _needSortKey || _needGeoNearDistance || _needGeoNearPoint); } +std::list<std::string> DepsTracker::sortedFields() const { + // Use a special comparator to put parent fieldpaths before their children. + std::list<std::string> sortedFields(fields.begin(), fields.end()); + sortedFields.sort(PathPrefixComparator()); + return sortedFields; +} + BSONObj DepsTracker::toProjection() const { BSONObjBuilder bb; @@ -83,21 +90,23 @@ BSONObj DepsTracker::toProjection() const { return bb.obj(); } - bool needId = false; + // Go through dependency fieldpaths to find the minimal set of projections that cover the + // dependencies. For example, the dependencies ["a.b", "a.b.c.g", "c", "c.d", "f"] would be + // minimally covered by the projection {"a.b": 1, "c": 1, "f": 1}. The key operation here is + // folding dependencies into ancestor dependencies, wherever possible. This is assisted by a + // special sort in DepsTracker::sortedFields that treats '.' as the first char and thus places + // parent paths directly before their children. + bool idSpecified = false; std::string last; - for (const auto& field : fields) { + for (const auto& field : sortedFields()) { if (str::startsWith(field, "_id") && (field.size() == 3 || field[3] == '.')) { // _id and subfields are handled specially due in part to SERVER-7502 - needId = true; + idSpecified = true; continue; } if (!last.empty() && str::startsWith(field, last)) { - // we are including a parent of *it so we don't need to include this field - // explicitly. In fact, due to SERVER-6527 if we included this field, the parent - // wouldn't be fully included. This logic relies on on set iterators going in - // lexicographic order so that a string is always directly before of all fields it - // prefixes. + // We are including a parent of this field, so we can skip this field. continue; } @@ -110,7 +119,7 @@ BSONObj DepsTracker::toProjection() const { bb.append(field, 1); } - if (needId) // we are explicit either way + if (idSpecified) // we are explicit either way bb.append("_id", 1); else bb.append("_id", 0); @@ -287,4 +296,36 @@ Document documentHelper(const BSONObj& bson, const Document& neededFields, int n Document ParsedDeps::extractFields(const BSONObj& input) const { return documentHelper(input, _fields, _nFields); } + +// Returns true if the lhs value should sort before the rhs, false otherwise. +bool PathPrefixComparator::operator()(const std::string& lhs, const std::string& rhs) const { + constexpr char dot = '.'; + + for (size_t pos = 0, len = std::min(lhs.size(), rhs.size()); pos < len; ++pos) { + // Below, we explicitly choose unsigned char because the usual const char& returned by + // operator[] is actually signed on x86 and will incorrectly order unicode characters. + unsigned char lchar = lhs[pos], rchar = rhs[pos]; + if (lchar == rchar) { + continue; + } + + // Consider the path delimiter '.' as being less than all other characters, so that + // paths sort directly before any paths they prefix and directly after any paths + // which prefix them. + if (lchar == dot) { + return true; + } else if (rchar == dot) { + return false; + } + + // Otherwise, default to normal character comparison. + return lchar < rchar; + } + + // If we get here, then we have reached the end of lhs and/or rhs and all of their path + // segments up to this point match. If lhs is shorter than rhs, then lhs prefixes rhs + // and should sort before it. + return lhs.size() < rhs.size(); +} + } // namespace mongo diff --git a/src/mongo/db/pipeline/dependencies.h b/src/mongo/db/pipeline/dependencies.h index 3487584a4a0..525ed567d8a 100644 --- a/src/mongo/db/pipeline/dependencies.h +++ b/src/mongo/db/pipeline/dependencies.h @@ -170,6 +170,11 @@ struct DepsTracker { */ std::vector<MetadataType> getAllRequiredMetadataTypes() const; + /** + * Return fieldpaths ordered such that a parent is immediately before its children. + */ + std::list<std::string> sortedFields() const; + std::set<std::string> fields; // Names of needed fields in dotted notation. std::set<Variables::Id> vars; // IDs of referenced variables. bool needWholeDocument = false; // If true, ignore 'fields'; the whole document is needed. @@ -205,4 +210,13 @@ private: Document _fields; int _nFields; // Cache the number of top-level fields needed. }; + + +/** Custom comparator that orders fieldpath strings by path prefix first, then by field. + * This ensures that a parent field is ordered directly before its children. + */ +struct PathPrefixComparator { + /* Returns true if the lhs value should sort before the rhs, false otherwise. */ + bool operator()(const std::string& lhs, const std::string& rhs) const; +}; } // namespace mongo diff --git a/src/mongo/db/pipeline/dependencies_test.cpp b/src/mongo/db/pipeline/dependencies_test.cpp index 6d2741a78e4..dd8d62ad2c7 100644 --- a/src/mongo/db/pipeline/dependencies_test.cpp +++ b/src/mongo/db/pipeline/dependencies_test.cpp @@ -76,6 +76,13 @@ TEST(DependenciesToProjectionTest, ShouldNotIncludeSubFieldIfTopLevelAlreadyIncl ASSERT_BSONOBJ_EQ(deps.toProjection(), BSON("a" << 1 << "b" << 1 << "_id" << 0)); } +TEST(DependenciesToProjectionTest, ExcludeIndirectDescendants) { + const char* array[] = {"a.b", "_id", "a.b.c.d.e"}; + DepsTracker deps; + deps.fields = arrayToSet(array); + ASSERT_BSONOBJ_EQ(deps.toProjection(), BSON("a.b" << 1 << "_id" << 1)); +} + TEST(DependenciesToProjectionTest, ShouldIncludeIdIfNeeded) { const char* array[] = {"a", "_id"}; DepsTracker deps; @@ -104,6 +111,25 @@ TEST(DependenciesToProjectionTest, ShouldIncludeFieldPrefixedById) { ASSERT_BSONOBJ_EQ(deps.toProjection(), BSON("_id_a" << 1 << "a" << 1 << "_id" << 1)); } +// SERVER-66418 +TEST(DependenciesToProjectionTest, ChildCoveredByParentWithSpecialChars) { + // without "_id" + { + // This is an important test case because '-' is one of the few chars before '.' in utf-8. + const char* array[] = {"a", "a-b", "a.b"}; + DepsTracker deps; + deps.fields = arrayToSet(array); + ASSERT_BSONOBJ_EQ(deps.toProjection(), BSON("a" << 1 << "a-b" << 1 << "_id" << 0)); + } + // with "_id" + { + const char* array[] = {"_id", "a", "a-b", "a.b"}; + DepsTracker deps; + deps.fields = arrayToSet(array); + ASSERT_BSONOBJ_EQ(deps.toProjection(), BSON("a" << 1 << "a-b" << 1 << "_id" << 1)); + } +} + TEST(DependenciesToProjectionTest, ShouldOutputEmptyObjectIfEntireDocumentNeeded) { const char* array[] = {"a"}; // fields ignored with needWholeDocument DepsTracker deps; @@ -159,5 +185,56 @@ TEST(DependenciesToProjectionTest, ASSERT_BSONOBJ_EQ(deps.toProjection(), BSON(Document::metaFieldTextScore << metaTextScore)); } +TEST(DependenciesToProjectionTest, SortFieldPaths) { + const char* array[] = {"", + "A", + "_id", + "a", + "a.b", + "a.b.c", + "a.c", + // '-' char in utf-8 comes before '.' but our special fieldpath sort + // puts '.' first so that children directly follow their parents. + "a-b", + "a-b.ear", + "a-bear", + "a-bear.", + "a🌲", + "b", + "b.a" + "b.aa" + "b.🌲d"}; + DepsTracker deps; + deps.fields = arrayToSet(array); + // our custom sort will restore the ordering above + std::list<std::string> fieldPathSorted = deps.sortedFields(); + auto itr = fieldPathSorted.begin(); + for (unsigned long i = 0; i < fieldPathSorted.size(); i++) { + ASSERT_EQ(*itr, array[i]); + ++itr; + } +} + +TEST(DependenciesToProjectionTest, PathLessThan) { + auto lessThan = PathPrefixComparator(); + ASSERT_FALSE(lessThan("a", "a")); + ASSERT_TRUE(lessThan("a", "aa")); + ASSERT_TRUE(lessThan("a", "b")); + ASSERT_TRUE(lessThan("", "a")); + ASSERT_TRUE(lessThan("Aa", "aa")); + ASSERT_TRUE(lessThan("a.b", "ab")); + ASSERT_TRUE(lessThan("a.b", "a-b")); // SERVER-66418 + ASSERT_TRUE(lessThan("a.b", "a b")); // SERVER-66418 + // verify the difference from the standard sort + ASSERT_TRUE(std::string("a.b") > std::string("a-b")); + ASSERT_TRUE(std::string("a.b") > std::string("a b")); + // test unicode behavior + ASSERT_TRUE(lessThan("a.b", "a🌲")); + ASSERT_TRUE(lessThan("a.b", "a🌲b")); + ASSERT_TRUE(lessThan("🌲", "🌳")); // U+1F332 < U+1F333 + ASSERT_TRUE(lessThan("🌲", "🌲.b")); + ASSERT_FALSE(lessThan("🌲.b", "🌲")); +} + } // namespace } // namespace mongo diff --git a/src/mongo/db/pipeline/parsed_aggregation_projection.h b/src/mongo/db/pipeline/parsed_aggregation_projection.h index 0542847e9ba..5ea116e7a58 100644 --- a/src/mongo/db/pipeline/parsed_aggregation_projection.h +++ b/src/mongo/db/pipeline/parsed_aggregation_projection.h @@ -98,38 +98,6 @@ private: // The original object. Used to generate more helpful error messages. const BSONObj& _rawObj; - // Custom comparator that orders fieldpath strings by path prefix first, then by field. - struct PathPrefixComparator { - static constexpr char dot = '.'; - - // Returns true if the lhs value should sort before the rhs, false otherwise. - bool operator()(const std::string& lhs, const std::string& rhs) const { - for (size_t pos = 0, len = std::min(lhs.size(), rhs.size()); pos < len; ++pos) { - auto &lchar = lhs[pos], &rchar = rhs[pos]; - if (lchar == rchar) { - continue; - } - - // Consider the path delimiter '.' as being less than all other characters, so that - // paths sort directly before any paths they prefix and directly after any paths - // which prefix them. - if (lchar == dot) { - return true; - } else if (rchar == dot) { - return false; - } - - // Otherwise, default to normal character comparison. - return lchar < rchar; - } - - // If we get here, then we have reached the end of lhs and/or rhs and all of their path - // segments up to this point match. If lhs is shorter than rhs, then lhs prefixes rhs - // and should sort before it. - return lhs.size() < rhs.size(); - } - }; - // Tracks which paths we've seen to ensure no two paths conflict with each other. std::set<std::string, PathPrefixComparator> _seenPaths; }; |