diff options
author | George Wangensteen <george.wangensteen@10gen.com> | 2019-06-19 15:24:38 -0400 |
---|---|---|
committer | George Wangensteen <george.wangensteen@10gen.com> | 2019-06-28 16:01:59 -0400 |
commit | b651ed3dc6ae61332bb3193afef1cfa41bf5df53 (patch) | |
tree | d2fd427998d5ec995aeaa4faf8d13e597e745aa5 | |
parent | abe43b7aaddd8254c51eb2c8bfce99b9e11f8fee (diff) | |
download | mongo-b651ed3dc6ae61332bb3193afef1cfa41bf5df53.tar.gz |
SERVER-41750 refactor renamed paths API.
-rw-r--r-- | src/mongo/db/pipeline/SConscript | 3 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source.cpp | 132 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source.h | 8 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_project_test.cpp | 24 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_test.cpp | 284 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline.cpp | 42 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline.h | 21 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_test.cpp | 221 | ||||
-rw-r--r-- | src/mongo/db/pipeline/semantic_analysis.cpp | 246 | ||||
-rw-r--r-- | src/mongo/db/pipeline/semantic_analysis.h | 102 | ||||
-rw-r--r-- | src/mongo/db/pipeline/semantic_analysis_test.cpp | 434 | ||||
-rw-r--r-- | src/mongo/s/query/cluster_aggregation_planner.cpp | 11 |
12 files changed, 1000 insertions, 528 deletions
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index e46d6b5f96a..adca346de94 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -318,6 +318,7 @@ pipelineeEnv.Library( 'document_source_sort_by_count.cpp', 'document_source_tee_consumer.cpp', 'document_source_unwind.cpp', + 'semantic_analysis.cpp', 'pipeline.cpp', 'sequential_document_cache.cpp', 'stage_constraints.cpp', @@ -450,7 +451,6 @@ env.CppUnitTest( 'document_source_skip_test.cpp', 'document_source_sort_by_count_test.cpp', 'document_source_sort_test.cpp', - 'document_source_test.cpp', 'document_source_unwind_test.cpp', 'document_value_test.cpp', 'document_value_test_util_self_test.cpp', @@ -472,6 +472,7 @@ env.CppUnitTest( 'pipeline_test.cpp', 'process_interface_standalone_test.cpp', 'resume_token_test.cpp', + 'semantic_analysis_test.cpp', 'sequential_document_cache_test.cpp', 'tee_buffer_test.cpp', 'value_comparator_test.cpp', diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp index 24775843286..75911585dfe 100644 --- a/src/mongo/db/pipeline/document_source.cpp +++ b/src/mongo/db/pipeline/document_source.cpp @@ -39,6 +39,7 @@ #include "mongo/db/pipeline/document_source_sequential_document_cache.h" #include "mongo/db/pipeline/expression_context.h" #include "mongo/db/pipeline/field_path.h" +#include "mongo/db/pipeline/semantic_analysis.h" #include "mongo/db/pipeline/value.h" #include "mongo/util/string_map.h" @@ -96,41 +97,6 @@ intrusive_ptr<DocumentSource> DocumentSource::optimize() { namespace { /** - * Given a set of paths 'dependencies', determines which of those paths will be modified if all - * paths except those in 'preservedPaths' are modified. - * - * For example, extractModifiedDependencies({'a', 'b', 'c.d', 'e'}, {'a', 'b.c', c'}) returns - * {'b', 'e'}, since 'b' and 'e' are not preserved (only 'b.c' is preserved). - */ -std::set<std::string> extractModifiedDependencies(const std::set<std::string>& dependencies, - const std::set<std::string>& preservedPaths) { - std::set<std::string> modifiedDependencies; - - // The modified dependencies is *almost* the set difference 'dependencies' - 'preservedPaths', - // except that if p in 'preservedPaths' is a "path prefix" of d in 'dependencies', then 'd' - // should not be included in the modified dependencies. - for (auto&& dependency : dependencies) { - bool preserved = false; - auto firstField = FieldPath::extractFirstFieldFromDottedPath(dependency).toString(); - // If even a prefix is preserved, the path is preserved, so search for any prefixes of - // 'dependency' as well. 'preservedPaths' is an *ordered* set, so we only have to search the - // range ['firstField', 'dependency'] to find any prefixes of 'dependency'. - for (auto it = preservedPaths.lower_bound(firstField); - it != preservedPaths.upper_bound(dependency); - ++it) { - if (*it == dependency || expression::isPathPrefixOf(*it, dependency)) { - preserved = true; - break; - } - } - if (!preserved) { - modifiedDependencies.insert(dependency); - } - } - return modifiedDependencies; -} - -/** * Returns a pair of pointers to $match stages, either of which can be null. The first entry in the * pair is a $match stage that can be moved before this stage, the second is a $match stage that * must remain after this stage. @@ -158,59 +124,14 @@ splitMatchByModifiedFields(const boost::intrusive_ptr<DocumentSourceMatch>& matc for (auto&& rename : modifiedPathsRet.renames) { preservedPaths.insert(rename.first); } - modifiedPaths = extractModifiedDependencies(depsTracker.fields, preservedPaths); + modifiedPaths = + semantic_analysis::extractModifiedDependencies(depsTracker.fields, preservedPaths); } } return match->splitSourceBy(modifiedPaths, modifiedPathsRet.renames); } /** - * If 'pathOfInterest' or some path prefix of 'pathOfInterest' is renamed, returns the new name for - * 'pathOfInterest', otherwise returns boost::none. - * For example, if 'renamedPaths' is {"c.d", "c"}, and 'pathOfInterest' is "c.d.f", returns "c.f". - */ -boost::optional<std::string> findNewName(const StringMap<std::string>& renamedPaths, - std::string pathOfInterest) { - FieldPath fullPathOfInterest(pathOfInterest); - StringBuilder toLookup; - std::size_t pathIndex = 0; - while (pathIndex < fullPathOfInterest.getPathLength()) { - if (pathIndex != 0) { - toLookup << "."; - } - toLookup << fullPathOfInterest.getFieldName(pathIndex++); - - auto it = renamedPaths.find(toLookup.stringData()); - if (it != renamedPaths.end()) { - const auto& newPathOfPrefix = it->second; - // We found a rename! Note this might be a rename of the prefix of the path, so we have - // to add back on the suffix that was unchanged. - StringBuilder renamedPath; - renamedPath << newPathOfPrefix; - while (pathIndex < fullPathOfInterest.getPathLength()) { - renamedPath << "." << fullPathOfInterest.getFieldName(pathIndex++); - } - return {renamedPath.str()}; - } - } - return boost::none; -} - -StringMap<std::string> computeNewNamesAssumingAnyPathsNotRenamedAreUnmodified( - const StringMap<std::string>& renamedPaths, const std::set<std::string>& pathsOfInterest) { - StringMap<std::string> renameOut; - for (auto&& ofInterest : pathsOfInterest) { - if (auto newName = findNewName(renamedPaths, ofInterest)) { - renameOut[ofInterest] = *newName; - } else { - // This path was not renamed, assume it was unchanged and map it to itself. - renameOut[ofInterest] = ofInterest; - } - } - return renameOut; -} - -/** * Verifies whether or not a $group is able to swap with a succeeding $match stage. While ordinarily * $group can swap with a $match, it cannot if the following $match has an $exists predicate on _id, * and the $group has exactly one field as the $group key. This is because every document will have @@ -292,53 +213,6 @@ Pipeline::SourceContainer::iterator DocumentSource::optimizeAt( return doOptimizeAt(itr, container); } -boost::optional<StringMap<std::string>> DocumentSource::renamedPaths( - const std::set<std::string>& pathsOfInterest) const { - auto modifiedPathsRet = this->getModifiedPaths(); - switch (modifiedPathsRet.type) { - case DocumentSource::GetModPathsReturn::Type::kNotSupported: - case DocumentSource::GetModPathsReturn::Type::kAllPaths: - return boost::none; - case DocumentSource::GetModPathsReturn::Type::kFiniteSet: { - for (auto&& modified : modifiedPathsRet.paths) { - for (auto&& ofInterest : pathsOfInterest) { - // Any overlap of the path means the path of interest is not preserved. For - // example, if the path of interest is "a.b", then a modified path of "a", - // "a.b", or "a.b.c" would all signal that "a.b" is not preserved. - if (ofInterest == modified || - expression::isPathPrefixOf(ofInterest, modified) || - expression::isPathPrefixOf(modified, ofInterest)) { - // This stage modifies at least one of the fields which the caller is - // interested in, bail out. - return boost::none; - } - } - } - - // None of the paths of interest were modified, construct the result map, mapping - // the names after this stage to the names before this stage. - return computeNewNamesAssumingAnyPathsNotRenamedAreUnmodified(modifiedPathsRet.renames, - pathsOfInterest); - } - case DocumentSource::GetModPathsReturn::Type::kAllExcept: { - auto preservedPaths = modifiedPathsRet.paths; - for (auto&& rename : modifiedPathsRet.renames) { - // For the purposes of checking which paths are modified, consider renames to - // preserve the path. We'll circle back later to figure out the new name if - // appropriate. - preservedPaths.insert(rename.first); - } - auto modifiedPaths = extractModifiedDependencies(pathsOfInterest, preservedPaths); - if (modifiedPaths.empty()) { - return computeNewNamesAssumingAnyPathsNotRenamedAreUnmodified( - modifiedPathsRet.renames, pathsOfInterest); - } - return boost::none; - } - } - MONGO_UNREACHABLE; -} - void DocumentSource::serializeToArray(vector<Value>& array, boost::optional<ExplainOptions::Verbosity> explain) const { Value entry = serialize(explain); diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h index d1b5f3c06ba..435219fdfda 100644 --- a/src/mongo/db/pipeline/document_source.h +++ b/src/mongo/db/pipeline/document_source.h @@ -438,14 +438,6 @@ public: } /** - * Given 'currentNames' which describes a set of paths which the caller is interested in, - * returns boost::none if any of those paths are modified by this stage, or a mapping from - * their old name to their new name if they are preserved but possibly renamed by this stage. - */ - boost::optional<StringMap<std::string>> renamedPaths( - const std::set<std::string>& currentNames) const; - - /** * Get the dependencies this operation needs to do its job. If overridden, subclasses must add * all paths needed to apply their transformation to 'deps->fields', and call * 'deps->setNeedsMetadata()' to indicate what metadata (e.g. text score), if any, is required. diff --git a/src/mongo/db/pipeline/document_source_project_test.cpp b/src/mongo/db/pipeline/document_source_project_test.cpp index ad0eecfe573..34b04a1ce2e 100644 --- a/src/mongo/db/pipeline/document_source_project_test.cpp +++ b/src/mongo/db/pipeline/document_source_project_test.cpp @@ -41,6 +41,7 @@ #include "mongo/db/pipeline/document_source_mock.h" #include "mongo/db/pipeline/document_source_project.h" #include "mongo/db/pipeline/document_value_test_util.h" +#include "mongo/db/pipeline/semantic_analysis.h" #include "mongo/db/pipeline/value.h" #include "mongo/unittest/unittest.h" @@ -266,6 +267,29 @@ TEST_F(ProjectStageTest, CanUseRemoveSystemVariableToConditionallyExcludeProject ASSERT(project->getNext().isEOF()); } +TEST_F(ProjectStageTest, ProjectionCorrectlyReportsRenamesForwards) { + auto project = DocumentSourceProject::create(fromjson("{'renamedB' : '$b'}"), getExpCtx()); + auto renames = + semantic_analysis::renamedPaths({"b"}, *project, semantic_analysis::Direction::kForward); + // renamedPaths should return a mapping of old name->new name for each path in interestingPaths + // if the paths are all unmodified (but possibly renamed). Because path b is preserved, but + // renamed (to renamedB) we expect a renamedPaths to return a mapping from b->renamedB. + auto single_rename = renames->extract("b"); + ASSERT_FALSE(single_rename.empty()); + ASSERT_EQUALS(single_rename.mapped(), "renamedB"); + ASSERT_TRUE(renames->empty()); +} + +TEST_F(ProjectStageTest, ProjectionCorrectlyReportsRenamesBackwards) { + auto project = DocumentSourceProject::create(fromjson("{'renamedB' : '$b'}"), getExpCtx()); + auto renames = semantic_analysis::renamedPaths( + {"renamedB"}, *project, semantic_analysis::Direction::kBackward); + auto single_rename = renames->extract("renamedB"); + ASSERT_FALSE(single_rename.empty()); + ASSERT_EQUALS(single_rename.mapped(), "b"); + ASSERT_TRUE(renames->empty()); +} + /** * Creates BSON for a DocumentSourceProject that represents projecting a new computed field nested * 'depth' levels deep. diff --git a/src/mongo/db/pipeline/document_source_test.cpp b/src/mongo/db/pipeline/document_source_test.cpp deleted file mode 100644 index e5bf55c4151..00000000000 --- a/src/mongo/db/pipeline/document_source_test.cpp +++ /dev/null @@ -1,284 +0,0 @@ -/** - * Copyright (C) 2018-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * <http://www.mongodb.com/licensing/server-side-public-license>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/platform/basic.h" - -#include "mongo/bson/bsonmisc.h" -#include "mongo/bson/bsonobj.h" -#include "mongo/bson/bsonobjbuilder.h" -#include "mongo/db/pipeline/document_source.h" -#include "mongo/db/pipeline/document_source_test_optimizations.h" -#include "mongo/db/service_context_test_fixture.h" -#include "mongo/unittest/unittest.h" - -namespace mongo { - -namespace { - -class RenamesAToB : public DocumentSourceTestOptimizations { -public: - RenamesAToB() : DocumentSourceTestOptimizations() {} - GetModPathsReturn getModifiedPaths() const final { - // Pretend this stage simply renames the "a" field to be "b", leaving the value of "a" the - // same. This would be the equivalent of an {$addFields: {b: "$a"}}. - return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}, {{"b", "a"}}}; - } -}; - -TEST(DocumentSourceRenamedPaths, DoesReturnSimpleRenameFromFiniteSetRename) { - RenamesAToB renamesAToB; - auto renames = renamesAToB.renamedPaths({"b"}); - ASSERT(static_cast<bool>(renames)); - auto map = *renames; - ASSERT_EQ(map.size(), 1UL); - ASSERT_EQ(map["b"], "a"); -} - -TEST(DocumentSourceRenamedPaths, ReturnsSimpleMapForUnaffectedFieldsFromFiniteSetRename) { - RenamesAToB renamesAToB; - { - auto renames = renamesAToB.renamedPaths({"c"}); - ASSERT(static_cast<bool>(renames)); - auto map = *renames; - ASSERT_EQ(map.size(), 1UL); - ASSERT_EQ(map["c"], "c"); - } - - { - auto renames = renamesAToB.renamedPaths({"a"}); - ASSERT(static_cast<bool>(renames)); - auto map = *renames; - ASSERT_EQ(map.size(), 1UL); - ASSERT_EQ(map["a"], "a"); - } - - { - auto renames = renamesAToB.renamedPaths({"e", "f", "g"}); - ASSERT(static_cast<bool>(renames)); - auto map = *renames; - ASSERT_EQ(map.size(), 3UL); - ASSERT_EQ(map["e"], "e"); - ASSERT_EQ(map["f"], "f"); - ASSERT_EQ(map["g"], "g"); - } -} - -class RenameCToDPreserveEFG : public DocumentSourceTestOptimizations { -public: - RenameCToDPreserveEFG() : DocumentSourceTestOptimizations() {} - - GetModPathsReturn getModifiedPaths() const final { - return {GetModPathsReturn::Type::kAllExcept, - std::set<std::string>{"e", "f", "g"}, - {{"d", "c"}}}; - } -}; - -TEST(DocumentSourceRenamedPaths, DoesReturnSimpleRenameFromAllExceptRename) { - RenameCToDPreserveEFG renameCToDPreserveEFG; - auto renames = renameCToDPreserveEFG.renamedPaths({"d"}); - ASSERT(static_cast<bool>(renames)); - auto map = *renames; - ASSERT_EQ(map.size(), 1UL); - ASSERT_EQ(map["d"], "c"); -} - -TEST(DocumentSourceRenamedPaths, ReturnsSimpleMapForUnaffectedFieldsFromAllExceptRename) { - RenameCToDPreserveEFG renameCToDPreserveEFG; - { - auto renames = renameCToDPreserveEFG.renamedPaths({"e"}); - ASSERT(static_cast<bool>(renames)); - auto map = *renames; - ASSERT_EQ(map.size(), 1UL); - ASSERT_EQ(map["e"], "e"); - } - - { - auto renames = renameCToDPreserveEFG.renamedPaths({"f", "g"}); - ASSERT(static_cast<bool>(renames)); - auto map = *renames; - ASSERT_EQ(map.size(), 2UL); - ASSERT_EQ(map["f"], "f"); - ASSERT_EQ(map["g"], "g"); - } -} - -class RenameCDotDToEPreserveFDotG : public DocumentSourceTestOptimizations { -public: - RenameCDotDToEPreserveFDotG() : DocumentSourceTestOptimizations() {} - - GetModPathsReturn getModifiedPaths() const final { - return {GetModPathsReturn::Type::kAllExcept, std::set<std::string>{"f.g"}, {{"e", "c.d"}}}; - } -}; - -TEST(DocumentSourceRenamedPaths, DoesReturnRenameToDottedFieldFromAllExceptRename) { - RenameCDotDToEPreserveFDotG renameCDotDToEPreserveFDotG; - { - auto renames = renameCDotDToEPreserveFDotG.renamedPaths({"e"}); - ASSERT(static_cast<bool>(renames)); - auto map = *renames; - ASSERT_EQ(map.size(), 1UL); - ASSERT_EQ(map["e"], "c.d"); - } - { - auto renames = renameCDotDToEPreserveFDotG.renamedPaths({"e.x", "e.y"}); - ASSERT(static_cast<bool>(renames)); - auto map = *renames; - ASSERT_EQ(map.size(), 2UL); - ASSERT_EQ(map["e.x"], "c.d.x"); - ASSERT_EQ(map["e.y"], "c.d.y"); - } -} - -TEST(DocumentSourceRenamedPaths, DoesNotTreatPrefixAsUnmodifiedWhenSuffixIsModifiedFromAllExcept) { - RenameCDotDToEPreserveFDotG renameCDotDToEPreserveFDotG; - { - auto renames = renameCDotDToEPreserveFDotG.renamedPaths({"f"}); - ASSERT_FALSE(static_cast<bool>(renames)); - } - { - // This is the exception, the only path that is not modified. - auto renames = renameCDotDToEPreserveFDotG.renamedPaths({"f.g"}); - ASSERT(static_cast<bool>(renames)); - auto map = *renames; - ASSERT_EQ(map.size(), 1UL); - ASSERT_EQ(map["f.g"], "f.g"); - } - { - // We know "f.g" is preserved, so it follows that a subpath of that path is also preserved. - auto renames = renameCDotDToEPreserveFDotG.renamedPaths({"f.g.x", "f.g.xyz.foobarbaz"}); - ASSERT(static_cast<bool>(renames)); - auto map = *renames; - ASSERT_EQ(map.size(), 2UL); - ASSERT_EQ(map["f.g.x"], "f.g.x"); - ASSERT_EQ(map["f.g.xyz.foobarbaz"], "f.g.xyz.foobarbaz"); - } - - { - // This shares a prefix with the unmodified path, but should not be reported as unmodified. - auto renames = renameCDotDToEPreserveFDotG.renamedPaths({"f.x"}); - ASSERT_FALSE(static_cast<bool>(renames)); - } -} - -class RenameAToXDotYModifyCDotD : public DocumentSourceTestOptimizations { -public: - RenameAToXDotYModifyCDotD() : DocumentSourceTestOptimizations() {} - - GetModPathsReturn getModifiedPaths() const final { - return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{"c.d"}, {{"x.y", "a"}}}; - } -}; - -TEST(DocumentSourceRenamedPaths, DoesReturnRenameToDottedFieldFromFiniteSetRename) { - RenameAToXDotYModifyCDotD renameAToXDotYModifyCDotD; - { - auto renames = renameAToXDotYModifyCDotD.renamedPaths({"x.y"}); - ASSERT(static_cast<bool>(renames)); - auto map = *renames; - ASSERT_EQ(map.size(), 1UL); - ASSERT_EQ(map["x.y"], "a"); - } - { - auto renames = renameAToXDotYModifyCDotD.renamedPaths({"x.y.z", "x.y.a.b.c"}); - ASSERT(static_cast<bool>(renames)); - auto map = *renames; - ASSERT_EQ(map.size(), 2UL); - ASSERT_EQ(map["x.y.z"], "a.z"); - ASSERT_EQ(map["x.y.a.b.c"], "a.a.b.c"); - } -} - -TEST(DocumentSourceRenamedPaths, DoesNotTreatPrefixAsUnmodifiedWhenSuffixIsPartOfModifiedSet) { - RenameAToXDotYModifyCDotD renameAToXDotYModifyCDotD; - { - auto renames = renameAToXDotYModifyCDotD.renamedPaths({"c"}); - ASSERT_FALSE(static_cast<bool>(renames)); - } - { - auto renames = renameAToXDotYModifyCDotD.renamedPaths({"c.d"}); - ASSERT_FALSE(static_cast<bool>(renames)); - } - { - auto renames = renameAToXDotYModifyCDotD.renamedPaths({"c.d.e"}); - ASSERT_FALSE(static_cast<bool>(renames)); - } - { - auto renames = renameAToXDotYModifyCDotD.renamedPaths({"c.not_d", "c.decoy"}); - ASSERT(static_cast<bool>(renames)); - auto map = *renames; - ASSERT_EQ(map.size(), 2UL); - ASSERT_EQ(map["c.not_d"], "c.not_d"); - ASSERT_EQ(map["c.decoy"], "c.decoy"); - } -} - -class ModifiesAllPaths : public DocumentSourceTestOptimizations { -public: - ModifiesAllPaths() : DocumentSourceTestOptimizations() {} - GetModPathsReturn getModifiedPaths() const final { - return {GetModPathsReturn::Type::kAllPaths, std::set<std::string>{}, {}}; - } -}; - -TEST(DocumentSourceRenamedPaths, ReturnsNoneWhenAllPathsAreModified) { - ModifiesAllPaths modifiesAllPaths; - { - auto renames = modifiesAllPaths.renamedPaths({"a"}); - ASSERT_FALSE(static_cast<bool>(renames)); - } - { - auto renames = modifiesAllPaths.renamedPaths({"a", "b", "c.d"}); - ASSERT_FALSE(static_cast<bool>(renames)); - } -} - -class ModificationsUnknown : public DocumentSourceTestOptimizations { -public: - ModificationsUnknown() : DocumentSourceTestOptimizations() {} - GetModPathsReturn getModifiedPaths() const final { - return {GetModPathsReturn::Type::kNotSupported, std::set<std::string>{}, {}}; - } -}; - -TEST(DocumentSourceRenamedPaths, ReturnsNoneWhenModificationsAreNotKnown) { - ModificationsUnknown modificationsUnknown; - { - auto renames = modificationsUnknown.renamedPaths({"a"}); - ASSERT_FALSE(static_cast<bool>(renames)); - } - { - auto renames = modificationsUnknown.renamedPaths({"a", "b", "c.d"}); - ASSERT_FALSE(static_cast<bool>(renames)); - } -} - -} // namespace -} // namespace mongo diff --git a/src/mongo/db/pipeline/pipeline.cpp b/src/mongo/db/pipeline/pipeline.cpp index 23484931890..73b14b80262 100644 --- a/src/mongo/db/pipeline/pipeline.cpp +++ b/src/mongo/db/pipeline/pipeline.cpp @@ -500,48 +500,6 @@ void Pipeline::addFinalSource(intrusive_ptr<DocumentSource> source) { _sources.push_back(source); } -boost::optional<StringMap<std::string>> Pipeline::renamedPaths( - SourceContainer::const_reverse_iterator rstart, - SourceContainer::const_reverse_iterator rend, - std::set<std::string> pathsOfInterest) { - // Use a vector to give a path id to each input path. A path's id is its index in the vector. - const std::vector<string> inputPaths(pathsOfInterest.begin(), pathsOfInterest.end()); - std::vector<string> currentPaths(pathsOfInterest.begin(), pathsOfInterest.end()); - - // Loop backwards over the stages. We will re-use 'pathsOfInterest', modifying that set each - // time to be the current set of field's we're interested in. At the same time, we will maintain - // 'currentPaths'. 'pathsOfInterest' is used to compute the renames, while 'currentPaths' is - // used to tie a path back to its id. - // - // Interestingly, 'currentPaths' may contain duplicates. For example, if a stage like - // {$addFields: {a: "$b"}} duplicates the value of "a" and both paths are of interest, then - // 'currentPaths' may begin as ["a", "b"] representing the paths after the $addFields stage, but - // becomes ["a", "a"] via the rename. - for (auto it = rstart; it != rend; ++it) { - boost::optional<StringMap<string>> renamed = (*it)->renamedPaths(pathsOfInterest); - if (!renamed) { - return boost::none; - } - pathsOfInterest.clear(); - for (std::size_t pathId = 0; pathId < inputPaths.size(); ++pathId) { - currentPaths[pathId] = (*renamed)[currentPaths[pathId]]; - pathsOfInterest.insert(currentPaths[pathId]); - } - } - - // We got all the way through the pipeline via renames! Construct the mapping from path at the - // end of the pipeline to path at the beginning. - StringMap<string> renameMap; - for (std::size_t pathId = 0; pathId < currentPaths.size(); ++pathId) { - renameMap[inputPaths[pathId]] = currentPaths[pathId]; - } - return renameMap; -} - -boost::optional<StringMap<string>> Pipeline::renamedPaths(std::set<string> pathsOfInterest) const { - return renamedPaths(_sources.rbegin(), _sources.rend(), std::move(pathsOfInterest)); -} - DepsTracker Pipeline::getDependencies(DepsTracker::MetadataAvailable metadataAvailable) const { DepsTracker deps(metadataAvailable); const bool scopeHasVariables = pCtx->variablesParseState.hasDefinedVariables(); diff --git a/src/mongo/db/pipeline/pipeline.h b/src/mongo/db/pipeline/pipeline.h index 75249314225..fb4f3d988ee 100644 --- a/src/mongo/db/pipeline/pipeline.h +++ b/src/mongo/db/pipeline/pipeline.h @@ -133,19 +133,6 @@ public: */ static bool aggHasWriteStage(const BSONObj& cmd); - /** - * Given 'pathsOfInterest' which describes a set of paths which the caller is interested in, - * returns boost::none if any of those paths are modified by the section of a pipeline - * described by 'rstart' and 'rend', or a mapping from their name at the end of the pipeline to - * their name at the beginning of the pipeline if they are preserved but possibly renamed by - * this pipeline. Note that the analysis proceeds backwards, so the iterators must be reverse - * iterators. - */ - static boost::optional<StringMap<std::string>> renamedPaths( - SourceContainer::const_reverse_iterator rstart, - SourceContainer::const_reverse_iterator rend, - std::set<std::string> pathsOfInterest); - const boost::intrusive_ptr<ExpressionContext>& getContext() const { return pCtx; } @@ -265,14 +252,6 @@ public: */ DepsTracker getDependencies(DepsTracker::MetadataAvailable metadataAvailable) const; - /** - * Given 'pathsOfInterest' which describes a set of paths which the caller is interested in, - * returns boost::none if any of those paths are modified by this pipeline, or a mapping from - * their name at the end of the pipeline to their name at the beginning of the pipeline if they - * are preserved but possibly renamed by this pipeline. - */ - boost::optional<StringMap<std::string>> renamedPaths( - std::set<std::string> pathsOfInterest) const; const SourceContainer& getSources() const { return _sources; diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp index f547d963d69..3d59eff1dfa 100644 --- a/src/mongo/db/pipeline/pipeline_test.cpp +++ b/src/mongo/db/pipeline/pipeline_test.cpp @@ -54,6 +54,7 @@ #include "mongo/db/pipeline/expression_context_for_test.h" #include "mongo/db/pipeline/field_path.h" #include "mongo/db/pipeline/pipeline.h" +#include "mongo/db/pipeline/semantic_analysis.h" #include "mongo/db/pipeline/stub_mongo_process_interface.h" #include "mongo/db/query/collation/collator_interface_mock.h" #include "mongo/db/query/query_test_service_context.h" @@ -2986,13 +2987,28 @@ TEST(PipelineRenameTracking, ReportsIdentityMapWhenEmpty) { boost::intrusive_ptr<ExpressionContext> expCtx(new ExpressionContextForTest()); auto pipeline = unittest::assertGet(Pipeline::create({DocumentSourceMock::createForTest()}, expCtx)); - auto renames = pipeline->renamedPaths({"a", "b", "c.d"}); - ASSERT(static_cast<bool>(renames)); - auto nameMap = *renames; - ASSERT_EQ(nameMap.size(), 3UL); - ASSERT_EQ(nameMap["a"], "a"); - ASSERT_EQ(nameMap["b"], "b"); - ASSERT_EQ(nameMap["c.d"], "c.d"); + { + // Tracking renames backwards. + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"a", "b", "c.d"}); + ASSERT(static_cast<bool>(renames)); + auto nameMap = *renames; + ASSERT_EQ(nameMap.size(), 3UL); + ASSERT_EQ(nameMap["a"], "a"); + ASSERT_EQ(nameMap["b"], "b"); + ASSERT_EQ(nameMap["c.d"], "c.d"); + } + { + // Tracking renames forwards. + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"a", "b", "c.d"}); + ASSERT(static_cast<bool>(renames)); + auto nameMap = *renames; + ASSERT_EQ(nameMap.size(), 3UL); + ASSERT_EQ(nameMap["a"], "a"); + ASSERT_EQ(nameMap["b"], "b"); + ASSERT_EQ(nameMap["c.d"], "c.d"); + } } class NoModifications : public DocumentSourceTestOptimizations { @@ -3013,9 +3029,24 @@ public: TEST(PipelineRenameTracking, ReportsIdentityWhenNoStageModifiesAnything) { boost::intrusive_ptr<ExpressionContext> expCtx(new ExpressionContextForTest()); { + // Tracking renames backwards. + auto pipeline = unittest::assertGet(Pipeline::create( + {DocumentSourceMock::createForTest(), NoModifications::create()}, expCtx)); + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"a", "b", "c.d"}); + ASSERT(static_cast<bool>(renames)); + auto nameMap = *renames; + ASSERT_EQ(nameMap.size(), 3UL); + ASSERT_EQ(nameMap["a"], "a"); + ASSERT_EQ(nameMap["b"], "b"); + ASSERT_EQ(nameMap["c.d"], "c.d"); + } + { + // Tracking renames forwards. auto pipeline = unittest::assertGet(Pipeline::create( {DocumentSourceMock::createForTest(), NoModifications::create()}, expCtx)); - auto renames = pipeline->renamedPaths({"a", "b", "c.d"}); + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"a", "b", "c.d"}); ASSERT(static_cast<bool>(renames)); auto nameMap = *renames; ASSERT_EQ(nameMap.size(), 3UL); @@ -3024,13 +3055,29 @@ TEST(PipelineRenameTracking, ReportsIdentityWhenNoStageModifiesAnything) { ASSERT_EQ(nameMap["c.d"], "c.d"); } { + // Tracking renames backwards. auto pipeline = unittest::assertGet(Pipeline::create({DocumentSourceMock::createForTest(), NoModifications::create(), NoModifications::create(), NoModifications::create()}, expCtx)); - auto renames = pipeline->renamedPaths({"a", "b", "c.d"}); - ASSERT(static_cast<bool>(renames)); + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"a", "b", "c.d"}); + auto nameMap = *renames; + ASSERT_EQ(nameMap.size(), 3UL); + ASSERT_EQ(nameMap["a"], "a"); + ASSERT_EQ(nameMap["b"], "b"); + ASSERT_EQ(nameMap["c.d"], "c.d"); + } + { + // Tracking renames forwards. + auto pipeline = unittest::assertGet(Pipeline::create({DocumentSourceMock::createForTest(), + NoModifications::create(), + NoModifications::create(), + NoModifications::create()}, + expCtx)); + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"a", "b", "c.d"}); auto nameMap = *renames; ASSERT_EQ(nameMap.size(), 3UL); ASSERT_EQ(nameMap["a"], "a"); @@ -3061,9 +3108,20 @@ TEST(PipelineRenameTracking, DoesNotReportRenamesIfAStageDoesNotSupportTrackingT NotSupported::create(), NoModifications::create()}, expCtx)); - ASSERT_FALSE(static_cast<bool>(pipeline->renamedPaths({"a"}))); - ASSERT_FALSE(static_cast<bool>(pipeline->renamedPaths({"a", "b"}))); - ASSERT_FALSE(static_cast<bool>(pipeline->renamedPaths({"x", "yahoo", "c.d"}))); + // Backwards case. + ASSERT_FALSE(static_cast<bool>(semantic_analysis::renamedPaths( + pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"a"}))); + ASSERT_FALSE(static_cast<bool>(semantic_analysis::renamedPaths( + pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"a", "b"}))); + ASSERT_FALSE(static_cast<bool>(semantic_analysis::renamedPaths( + pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"x", "yahoo", "c.d"}))); + // Forwards case. + ASSERT_FALSE(static_cast<bool>(semantic_analysis::renamedPaths( + pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"a"}))); + ASSERT_FALSE(static_cast<bool>(semantic_analysis::renamedPaths( + pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"a", "b"}))); + ASSERT_FALSE(static_cast<bool>(semantic_analysis::renamedPaths( + pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"x", "yahoo", "c.d"}))); } class RenamesAToB : public DocumentSourceTestOptimizations { @@ -3082,14 +3140,27 @@ TEST(PipelineRenameTracking, ReportsNewNamesWhenSingleStageRenames) { auto pipeline = unittest::assertGet( Pipeline::create({DocumentSourceMock::createForTest(), RenamesAToB::create()}, expCtx)); { - auto renames = pipeline->renamedPaths({"b"}); + // Tracking backwards. + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"b"}); ASSERT(static_cast<bool>(renames)); auto nameMap = *renames; ASSERT_EQ(nameMap.size(), 1UL); ASSERT_EQ(nameMap["b"], "a"); } { - auto renames = pipeline->renamedPaths({"b", "c.d"}); + // Tracking forwards. + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"a"}); + ASSERT(static_cast<bool>(renames)); + auto nameMap = *renames; + ASSERT_EQ(nameMap.size(), 1UL); + ASSERT_EQ(nameMap["a"], "b"); + } + { + // Tracking backwards. + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"b", "c.d"}); ASSERT(static_cast<bool>(renames)); auto nameMap = *renames; ASSERT_EQ(nameMap.size(), 2UL); @@ -3097,15 +3168,37 @@ TEST(PipelineRenameTracking, ReportsNewNamesWhenSingleStageRenames) { ASSERT_EQ(nameMap["c.d"], "c.d"); } { + // Tracking forwards. + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"a", "c.d"}); + ASSERT(static_cast<bool>(renames)); + auto nameMap = *renames; + ASSERT_EQ(nameMap.size(), 2UL); + ASSERT_EQ(nameMap["a"], "b"); + ASSERT_EQ(nameMap["c.d"], "c.d"); + } + + { // This is strange; the mock stage reports to essentially duplicate the "a" field into "b". // Because of this, both "b" and "a" should map to "a". - auto renames = pipeline->renamedPaths({"b", "a"}); + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"b", "a"}); ASSERT(static_cast<bool>(renames)); auto nameMap = *renames; ASSERT_EQ(nameMap.size(), 2UL); ASSERT_EQ(nameMap["b"], "a"); ASSERT_EQ(nameMap["a"], "a"); } + { + // Same strangeness as above, but in the forwards direction. + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"b", "a"}); + ASSERT(static_cast<bool>(renames)); + auto nameMap = *renames; + ASSERT_EQ(nameMap.size(), 2UL); + ASSERT_EQ(nameMap["a"], "b"); + ASSERT_EQ(nameMap["b"], "b"); + } } TEST(PipelineRenameTracking, ReportsIdentityMapWhenGivenEmptyIteratorRange) { @@ -3113,16 +3206,38 @@ TEST(PipelineRenameTracking, ReportsIdentityMapWhenGivenEmptyIteratorRange) { auto pipeline = unittest::assertGet( Pipeline::create({DocumentSourceMock::createForTest(), RenamesAToB::create()}, expCtx)); { - auto renames = Pipeline::renamedPaths( - pipeline->getSources().rbegin(), pipeline->getSources().rbegin(), {"b"}); + // Tracking backwards. + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().crbegin(), pipeline->getSources().crbegin(), {"b"}); ASSERT(static_cast<bool>(renames)); auto nameMap = *renames; ASSERT_EQ(nameMap.size(), 1UL); ASSERT_EQ(nameMap["b"], "b"); } { - auto renames = Pipeline::renamedPaths( - pipeline->getSources().rbegin(), pipeline->getSources().rbegin(), {"b", "c.d"}); + // Tracking forwards. + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().cbegin(), pipeline->getSources().cbegin(), {"b"}); + ASSERT(static_cast<bool>(renames)); + auto nameMap = *renames; + ASSERT_EQ(nameMap.size(), 1UL); + ASSERT_EQ(nameMap["b"], "b"); + } + + { + // Tracking backwards. + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().crbegin(), pipeline->getSources().crbegin(), {"b", "c.d"}); + ASSERT(static_cast<bool>(renames)); + auto nameMap = *renames; + ASSERT_EQ(nameMap.size(), 2UL); + ASSERT_EQ(nameMap["b"], "b"); + ASSERT_EQ(nameMap["c.d"], "c.d"); + } + { + // Tracking forwards. + auto renames = semantic_analysis::renamedPaths( + pipeline->getSources().cbegin(), pipeline->getSources().cbegin(), {"b", "c.d"}); ASSERT(static_cast<bool>(renames)); auto nameMap = *renames; ASSERT_EQ(nameMap.size(), 2UL); @@ -3144,14 +3259,30 @@ public: TEST(PipelineRenameTracking, ReportsNewNameAcrossMultipleRenames) { boost::intrusive_ptr<ExpressionContext> expCtx(new ExpressionContextForTest()); - auto pipeline = unittest::assertGet(Pipeline::create( - {DocumentSourceMock::createForTest(), RenamesAToB::create(), RenamesBToC::create()}, - expCtx)); - auto renames = pipeline->renamedPaths({"c"}); - ASSERT(static_cast<bool>(renames)); - auto nameMap = *renames; - ASSERT_EQ(nameMap.size(), 1UL); - ASSERT_EQ(nameMap["c"], "a"); + { + // Tracking backwards. + auto pipeline = unittest::assertGet(Pipeline::create( + {DocumentSourceMock::createForTest(), RenamesAToB::create(), RenamesBToC::create()}, + expCtx)); + auto stages = pipeline->getSources(); + auto renames = semantic_analysis::renamedPaths(stages.crbegin(), stages.crend(), {"c"}); + ASSERT(static_cast<bool>(renames)); + auto nameMap = *renames; + ASSERT_EQ(nameMap.size(), 1UL); + ASSERT_EQ(nameMap["c"], "a"); + } + { + // Tracking forwards. + auto pipeline = unittest::assertGet(Pipeline::create( + {DocumentSourceMock::createForTest(), RenamesAToB::create(), RenamesBToC::create()}, + expCtx)); + auto stages = pipeline->getSources(); + auto renames = semantic_analysis::renamedPaths(stages.cbegin(), stages.cend(), {"a"}); + ASSERT(static_cast<bool>(renames)); + auto nameMap = *renames; + ASSERT_EQ(nameMap.size(), 1UL); + ASSERT_EQ(nameMap["a"], "c"); + } } class RenamesBToA : public DocumentSourceTestOptimizations { @@ -3161,20 +3292,36 @@ public: return new RenamesBToA(); } GetModPathsReturn getModifiedPaths() const final { - return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}, {{"b", "a"}}}; + return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}, {{"a", "b"}}}; } }; TEST(PipelineRenameTracking, CanHandleBackAndForthRename) { boost::intrusive_ptr<ExpressionContext> expCtx(new ExpressionContextForTest()); - auto pipeline = unittest::assertGet(Pipeline::create( - {DocumentSourceMock::createForTest(), RenamesAToB::create(), RenamesBToA::create()}, - expCtx)); - auto renames = pipeline->renamedPaths({"a"}); - ASSERT(static_cast<bool>(renames)); - auto nameMap = *renames; - ASSERT_EQ(nameMap.size(), 1UL); - ASSERT_EQ(nameMap["a"], "a"); + { + // Tracking backwards. + auto pipeline = unittest::assertGet(Pipeline::create( + {DocumentSourceMock::createForTest(), RenamesAToB::create(), RenamesBToA::create()}, + expCtx)); + auto stages = pipeline->getSources(); + auto renames = semantic_analysis::renamedPaths(stages.crbegin(), stages.crend(), {"a"}); + ASSERT(static_cast<bool>(renames)); + auto nameMap = *renames; + ASSERT_EQ(nameMap.size(), 1UL); + ASSERT_EQ(nameMap["a"], "a"); + } + { + // Tracking forwards. + auto pipeline = unittest::assertGet(Pipeline::create( + {DocumentSourceMock::createForTest(), RenamesAToB::create(), RenamesBToA::create()}, + expCtx)); + auto stages = pipeline->getSources(); + auto renames = semantic_analysis::renamedPaths(stages.cbegin(), stages.cend(), {"a"}); + ASSERT(static_cast<bool>(renames)); + auto nameMap = *renames; + ASSERT_EQ(nameMap.size(), 1UL); + ASSERT_EQ(nameMap["a"], "a"); + } } TEST(InvolvedNamespacesTest, NoInvolvedNamespacesForMatchSortProject) { diff --git a/src/mongo/db/pipeline/semantic_analysis.cpp b/src/mongo/db/pipeline/semantic_analysis.cpp new file mode 100644 index 00000000000..256fbf9acbf --- /dev/null +++ b/src/mongo/db/pipeline/semantic_analysis.cpp @@ -0,0 +1,246 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/matcher/expression_algo.h" +#include "mongo/db/pipeline/pipeline.h" +#include "mongo/db/pipeline/semantic_analysis.h" + +namespace mongo::semantic_analysis { + +namespace { + +/** + * If 'pathOfInterest' or some path prefix of 'pathOfInterest' is renamed, returns the rename for + * 'pathOfInterest', otherwise returns boost::none. For example, if 'renamedPaths' is {"a", "b"}, + * and 'pathOfInterest' is "a.c", returns "b.c". Note that 'renamedPaths' must map names as they + * exist at one fixed point in an aggregation pipeline to names as they exist at another fixed point + * in the same pipeline (i.e. from path names as they exist before some particular aggregation + * stage, to names as they appear after that stage). + */ +boost::optional<std::string> findRename(const StringMap<std::string>& renamedPaths, + std::string pathOfInterest) { + FieldPath fullPathOfInterest(pathOfInterest); + boost::optional<StringBuilder> toLookup; + boost::optional<StringBuilder> renamedPath; + for (std::size_t pathIndex = 0; pathIndex < fullPathOfInterest.getPathLength(); ++pathIndex) { + if (!renamedPath) { + if (!toLookup) { + toLookup.emplace(); + } else { + (*toLookup) << "."; + } + (*toLookup) << fullPathOfInterest.getFieldName(pathIndex); + if (auto it = renamedPaths.find(toLookup->stringData()); it != renamedPaths.end()) { + renamedPath.emplace(); + (*renamedPath) << it->second; + } + // Found a rename - but it might be a rename of the prefix of the path, so we have to + // add back on the suffix that was unchanged. + } else { + (*renamedPath) << "." << fullPathOfInterest.getFieldName(pathIndex); + } + } + return renamedPath.map([](auto&& path) { return path.str(); }); +} + +/** + * Computes and returns a map that contains a mapping from each path in 'pathsOfInterest' to its + * rename, as determined by 'renamedPaths.' If a path was not renamed, assumes it is unmodified and + * maps the path to itself. + */ +StringMap<std::string> computeNamesAssumingAnyPathsNotRenamedAreUnmodified( + const StringMap<std::string>& renamedPaths, const std::set<std::string>& pathsOfInterest) { + StringMap<std::string> renameOut; + for (auto&& ofInterest : pathsOfInterest) { + if (auto name = findRename(renamedPaths, ofInterest)) { + renameOut[ofInterest] = *name; + } else { + // This path was not renamed, assume it was unchanged and map it to itself. + renameOut[ofInterest] = ofInterest; + } + } + return renameOut; +} + +StringMap<std::string> invertRenameMap(const StringMap<std::string>& originalMap) { + StringMap<std::string> reversedMap; + for (auto && [ newName, oldName ] : originalMap) { + reversedMap[oldName] = newName; + } + return reversedMap; +} + +/** + * Computes and returns a rename mapping for 'pathsOfInterest' over multiple aggregation pipeline + * stages. The range of pipeline stages we compute renames over is represented by the iterators + * 'start' and 'end'. If both 'start' and 'end' are reverse iterators, then 'start' should come + * after 'end' in the pipeline, 'traversalDir' should be "kBackward," 'pathsOfInterest' should be + * valid path names after stage 'start,' and this template will compute a mapping from the given + * names of 'pathsOfInterest' to their names as they were directly after stage 'end.'If both 'start' + * and 'end' are forwards iterators, then 'start' should come before 'end' in the pipeline, + * 'traversalDir' should be "kForward," 'pathsOfInterest' should be valid path names before stage + * 'start,' and this template will compute a mapping from the given names of 'pathsOfInterest' to + * their names as they are directly before stage 'end.' + * + * This should only be used internally; callers who need to track path renames through an + * aggregation pipeline should use one of the publically exposed options availible in the header. + */ +template <class Iterator> +boost::optional<StringMap<std::string>> multiStageRenamedPaths( + Iterator start, + Iterator end, + std::set<std::string> pathsOfInterest, + const Direction& traversalDir) { + // The keys to this map will always be the original names of 'pathsOfInterest'. The values will + // be updated as we loop through the pipeline's stages to always be the most up-to-date name we + // know of for that path. + StringMap<std::string> renameMap; + for (auto&& path : pathsOfInterest) { + renameMap[path] = path; + } + for (; start != end; ++start) { + auto renamed = renamedPaths(pathsOfInterest, **start, traversalDir); + if (!renamed) { + return boost::none; + } + //'pathsOfInterest' always holds the current names of the paths we're interested in, so it + // needs to be updated after each stage. + pathsOfInterest.clear(); + for (auto it = renameMap.cbegin(); it != renameMap.cend(); ++it) { + renameMap[it->first] = (*renamed)[it->second]; + pathsOfInterest.emplace(it->second); + } + } + return renameMap; +} + +} // namespace + +std::set<std::string> extractModifiedDependencies(const std::set<std::string>& dependencies, + const std::set<std::string>& preservedPaths) { + std::set<std::string> modifiedDependencies; + + // The modified dependencies is *almost* the set difference 'dependencies' - 'preservedPaths', + // except that if p in 'preservedPaths' is a "path prefix" of d in 'dependencies', then 'd' + // should not be included in the modified dependencies. + for (auto&& dependency : dependencies) { + bool preserved = false; + auto firstField = FieldPath::extractFirstFieldFromDottedPath(dependency).toString(); + // Because a path is preserved if the object it points to is preserved, if a prefix to a + // path is preserved, then the path itself must be preserved. So we search for any prefixes + // of 'dependency' as well. 'preservedPaths' is an *ordered* set, so we only have to search + // the range ['firstField', 'dependency'] to find any prefixes of 'dependency'. + for (auto it = preservedPaths.lower_bound(firstField); + it != preservedPaths.upper_bound(dependency); + ++it) { + if (*it == dependency || expression::isPathPrefixOf(*it, dependency)) { + preserved = true; + break; + } + } + if (!preserved) { + modifiedDependencies.insert(dependency); + } + } + return modifiedDependencies; +} + +boost::optional<StringMap<std::string>> renamedPaths(const std::set<std::string>& pathsOfInterest, + const DocumentSource& stage, + const Direction& traversalDir) { + auto modifiedPathsRet = stage.getModifiedPaths(); + switch (modifiedPathsRet.type) { + case DocumentSource::GetModPathsReturn::Type::kNotSupported: + case DocumentSource::GetModPathsReturn::Type::kAllPaths: + return boost::none; + case DocumentSource::GetModPathsReturn::Type::kFiniteSet: { + for (auto&& modified : modifiedPathsRet.paths) { + for (auto&& ofInterest : pathsOfInterest) { + // Any overlap of the path means the path of interest is not preserved. For + // example, if the path of interest is "a.b", then a modified path of "a", + // "a.b", or "a.b.c" would all signal that "a.b" is not preserved. + if (ofInterest == modified || + expression::isPathPrefixOf(ofInterest, modified) || + expression::isPathPrefixOf(modified, ofInterest)) { + // This stage modifies at least one of the fields which the caller is + // interested in, bail out. + return boost::none; + } + } + } + + // None of the paths of interest were modified, construct the result map, mapping + // the names after this stage to the names before this stage. + auto renameMap = (traversalDir == Direction::kForward) + ? invertRenameMap(modifiedPathsRet.renames) + : modifiedPathsRet.renames; + return computeNamesAssumingAnyPathsNotRenamedAreUnmodified(renameMap, pathsOfInterest); + } + case DocumentSource::GetModPathsReturn::Type::kAllExcept: { + auto preservedPaths = modifiedPathsRet.paths; + for (auto && [ newName, oldName ] : modifiedPathsRet.renames) { + // For the purposes of checking which paths are modified, consider renames to + // preserve the path. We'll circle back later to figure out the new name if + // appropriate. If we are going forward, we want to consider the name of the path + // before 'stage'; otherwise, we want to consider the name as it exists after + // 'stage'. + auto preservedPath = (traversalDir == Direction::kForward) ? oldName : newName; + preservedPaths.insert(preservedPath); + } + auto modifiedPaths = extractModifiedDependencies(pathsOfInterest, preservedPaths); + if (modifiedPaths.empty()) { + auto renameMap = (traversalDir == Direction::kForward) + ? invertRenameMap(modifiedPathsRet.renames) + : modifiedPathsRet.renames; + return computeNamesAssumingAnyPathsNotRenamedAreUnmodified(renameMap, + pathsOfInterest); + } + return boost::none; + } + } + MONGO_UNREACHABLE; +} + +boost::optional<StringMap<std::string>> renamedPaths( + const Pipeline::SourceContainer::const_iterator start, + const Pipeline::SourceContainer::const_iterator end, + const std::set<std::string>& pathsOfInterest) { + return multiStageRenamedPaths(start, end, pathsOfInterest, Direction::kForward); +} + +boost::optional<StringMap<std::string>> renamedPaths( + const Pipeline::SourceContainer::const_reverse_iterator start, + const Pipeline::SourceContainer::const_reverse_iterator end, + const std::set<std::string>& pathsOfInterest) { + return multiStageRenamedPaths(start, end, pathsOfInterest, Direction::kBackward); +} + +} // namespace mongo::semantic_analysis diff --git a/src/mongo/db/pipeline/semantic_analysis.h b/src/mongo/db/pipeline/semantic_analysis.h new file mode 100644 index 00000000000..73739919e10 --- /dev/null +++ b/src/mongo/db/pipeline/semantic_analysis.h @@ -0,0 +1,102 @@ +/** + * Copyright (C) 2019-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <boost/optional.hpp> +#include <set> +#include <string> + +#include "mongo/db/pipeline/document_source.h" +#include "mongo/db/pipeline/pipeline.h" + +namespace mongo::semantic_analysis { + +enum class Direction { kForward, kBackward }; + +/** + * Takes in a set of paths the caller is interested in, a pipeline stage, and a direction. If the + * direction is "forward", the paths given must exist before the stage is executed in the pipeline. + * If the direction is "backward," the paths must exist after the stage is executed in the pipeline. + * Returns boost::none if any of the pathsOfInterest are modified by the current stage. If all of + * the pathsOfInterest are preserved (but possibly renamed), we return a mapping from + * [pathOfInterest] --> [newName], where "newName" is the name of "pathOfInterest" on the "other + * side" of the stage with respect to the given direction. + * + * For example, say pathsOfInterest contains "a", a path name that exists before nextStage is run in + * the pipeline, and direction is forward. Say nextStage preserves all the paths but renames "a" to + * "b"; we would return a mapping a-->b. + * + * Conversely, say pathsOfInterest contains "b", a path name that exists after nextStage is run in + * the pipeline, and direction is backward. Say nextStage preserves all the paths but renamed "a" to + * "b"; we would return a mapping b-->a. + */ +boost::optional<StringMap<std::string>> renamedPaths(const std::set<std::string>& pathsOfInterest, + const DocumentSource& stage, + const Direction& traversalDir); +/** + * Tracks renames by walking a pipeline forwards. Takes two forward iterators that represent two + * stages in an aggregation pipeline, with 'start' coming before 'end,' as well as a set of path + * names the caller is interested in. Returns boost::none if any of these paths are modified by the + * pipeline from stages ['start' - 'end'); otherwise returns a mapping for each path in + * 'pathsOfInterest' from its name before stage 'start' to its name before stage 'end'. To track + * renames through an entire pipeline, 'start' should refer to the first stage in the pipeline while + * 'end' should be an iterator referring to the past-the-end stage. + */ +boost::optional<StringMap<std::string>> renamedPaths( + const Pipeline::SourceContainer::const_iterator start, + const Pipeline::SourceContainer::const_iterator end, + const std::set<std::string>& pathsOfInterest); + +/** + * Tracks renames by walking a pipeline backwards. Takes two reverse iterators that represent two + * stages in an aggregation pipeline, with 'start' coming after 'end,' as well as a set of path + * names the caller is interested in. Returns boost::none if any of these paths were modified by the + * pipeline from stages ('end' - 'start']; otherwise returns a mapping for each path in + * 'pathsOfInterest' from its name after stage 'start' to its name as it was after stage 'end'. To + * track renames through an entire pipeline, 'start' should refer to the last stage in the pipeline + * while 'end' should refer to the (hypothetical) stage preceeding the first stage in the pipeline + * (the 'reverse end'). + */ +boost::optional<StringMap<std::string>> renamedPaths( + const Pipeline::SourceContainer::const_reverse_iterator start, + const Pipeline::SourceContainer::const_reverse_iterator end, + const std::set<std::string>& pathsOfInterest); + +/** + * Given a set of paths 'dependencies', determines which of those paths will be modified if all + * paths except those in 'preservedPaths' are modified. + * + * For example, extractModifiedDependencies({'a', 'b', 'c.d', 'e'}, {'a', 'b.c', c'}) returns + * {'b', 'e'}, since 'b' and 'e' are not preserved (only 'b.c' is preserved). + */ +std::set<std::string> extractModifiedDependencies(const std::set<std::string>& dependencies, + const std::set<std::string>& preservedPaths); + +} // namespace mongo::semantic_analysis diff --git a/src/mongo/db/pipeline/semantic_analysis_test.cpp b/src/mongo/db/pipeline/semantic_analysis_test.cpp new file mode 100644 index 00000000000..8b43af7e9c3 --- /dev/null +++ b/src/mongo/db/pipeline/semantic_analysis_test.cpp @@ -0,0 +1,434 @@ +/** + * Copyright (C) 2018-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/bson/bsonmisc.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/db/pipeline/document_source.h" +#include "mongo/db/pipeline/document_source_test_optimizations.h" +#include "mongo/db/pipeline/semantic_analysis.h" +#include "mongo/db/service_context_test_fixture.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { + +namespace { + +using namespace semantic_analysis; + +class RenamesAToB : public DocumentSourceTestOptimizations { +public: + RenamesAToB() : DocumentSourceTestOptimizations() {} + GetModPathsReturn getModifiedPaths() const final { + // Pretend this stage simply renames the "a" field to be "b", leaving the value of "a" the + // same. This would be the equivalent of an {$addFields: {b: "$a"}}. + return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}, {{"b", "a"}}}; + } +}; + +TEST(SemanticAnalysisRenamedPaths, DoesReturnSimpleRenameFromFiniteSetRename) { + RenamesAToB renamesAToB; + { + auto renames = renamedPaths({"a"}, renamesAToB, Direction::kForward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["a"], "b"); + } + { + auto renames = renamedPaths({"b"}, renamesAToB, Direction::kBackward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["b"], "a"); + } +} + +TEST(SemanticAnalysisRenamedPaths, ReturnsSimpleMapForUnaffectedFieldsFromFiniteSetRename) { + RenamesAToB renamesAToB; + { + auto renames = renamedPaths({"c"}, renamesAToB, Direction::kForward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["c"], "c"); + } + { + auto renames = renamedPaths({"c"}, renamesAToB, Direction::kBackward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["c"], "c"); + } + { + auto renames = renamedPaths({"a"}, renamesAToB, Direction::kForward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["a"], "b"); + } + { + auto renames = renamedPaths({"b"}, renamesAToB, Direction::kBackward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["b"], "a"); + } + { + auto renames = renamedPaths({"e", "f", "g"}, renamesAToB, Direction::kForward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 3UL); + ASSERT_EQ(map["e"], "e"); + ASSERT_EQ(map["f"], "f"); + ASSERT_EQ(map["g"], "g"); + } + { + auto renames = renamedPaths({"e", "f", "g"}, renamesAToB, Direction::kBackward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 3UL); + ASSERT_EQ(map["e"], "e"); + ASSERT_EQ(map["f"], "f"); + ASSERT_EQ(map["g"], "g"); + } +} + +class RenameCToDPreserveEFG : public DocumentSourceTestOptimizations { +public: + RenameCToDPreserveEFG() : DocumentSourceTestOptimizations() {} + + GetModPathsReturn getModifiedPaths() const final { + return {GetModPathsReturn::Type::kAllExcept, + std::set<std::string>{"e", "f", "g"}, + {{"d", "c"}}}; + } +}; + +TEST(SemanticAnalysisRenamedPaths, DoesReturnSimpleRenameFromAllExceptRename) { + RenameCToDPreserveEFG renameCToDPreserveEFG; + { + auto renames = renamedPaths({"c"}, renameCToDPreserveEFG, Direction::kForward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["c"], "d"); + } + { + auto renames = renamedPaths({"d"}, renameCToDPreserveEFG, Direction::kBackward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["d"], "c"); + } +} + +TEST(SemanticAnalysisRenamedPaths, ReturnsSimpleMapForUnaffectedFieldsFromAllExceptRename) { + RenameCToDPreserveEFG renameCToDPreserveEFG; + { + auto renames = renamedPaths({"e"}, renameCToDPreserveEFG, Direction::kForward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["e"], "e"); + } + { + auto renames = renamedPaths({"e"}, renameCToDPreserveEFG, Direction::kBackward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["e"], "e"); + } + { + auto renames = renamedPaths({"f", "g"}, renameCToDPreserveEFG, Direction::kForward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 2UL); + ASSERT_EQ(map["f"], "f"); + ASSERT_EQ(map["g"], "g"); + } + { + auto renames = renamedPaths({"f", "g"}, renameCToDPreserveEFG, Direction::kBackward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 2UL); + ASSERT_EQ(map["f"], "f"); + ASSERT_EQ(map["g"], "g"); + } +} + +class RenameCDotDToEPreserveFDotG : public DocumentSourceTestOptimizations { +public: + RenameCDotDToEPreserveFDotG() : DocumentSourceTestOptimizations() {} + + GetModPathsReturn getModifiedPaths() const final { + return {GetModPathsReturn::Type::kAllExcept, std::set<std::string>{"f.g"}, {{"e", "c.d"}}}; + } +}; + +TEST(SemanticAnalysisRenamedPaths, DoesReturnRenameToDottedFieldFromAllExceptRename) { + RenameCDotDToEPreserveFDotG renameCDotDToEPreserveFDotG; + { + auto renames = renamedPaths({"c.d"}, renameCDotDToEPreserveFDotG, Direction::kForward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["c.d"], "e"); + } + { + auto renames = renamedPaths({"e"}, renameCDotDToEPreserveFDotG, Direction::kBackward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["e"], "c.d"); + } + { + auto renames = + renamedPaths({"c.d.x", "c.d.y"}, renameCDotDToEPreserveFDotG, Direction::kForward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 2UL); + ASSERT_EQ(map["c.d.x"], "e.x"); + ASSERT_EQ(map["c.d.y"], "e.y"); + } + { + auto renames = + renamedPaths({"e.x", "e.y"}, renameCDotDToEPreserveFDotG, Direction::kBackward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 2UL); + ASSERT_EQ(map["e.x"], "c.d.x"); + ASSERT_EQ(map["e.y"], "c.d.y"); + } +} + +TEST(SemanticAnalysisRenamedPaths, + DoesNotTreatPrefixAsUnmodifiedWhenSuffixIsModifiedFromAllExcept) { + RenameCDotDToEPreserveFDotG renameCDotDToEPreserveFDotG; + { + auto renames = renamedPaths({"f"}, renameCDotDToEPreserveFDotG, Direction::kForward); + ASSERT_FALSE(static_cast<bool>(renames)); + } + { + auto renames = renamedPaths({"f"}, renameCDotDToEPreserveFDotG, Direction::kBackward); + ASSERT_FALSE(static_cast<bool>(renames)); + } + { + // This is the exception, the only path that is not modified. + auto renames = renamedPaths({"f.g"}, renameCDotDToEPreserveFDotG, Direction::kForward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["f.g"], "f.g"); + } + { + // This is the exception, the only path that is not modified. + auto renames = renamedPaths({"f.g"}, renameCDotDToEPreserveFDotG, Direction::kBackward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["f.g"], "f.g"); + } + { + // We know "f.g" is preserved, so it follows that a subpath of that path is also preserved. + auto renames = renamedPaths( + {"f.g.x", "f.g.xyz.foobarbaz"}, renameCDotDToEPreserveFDotG, Direction::kForward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 2UL); + ASSERT_EQ(map["f.g.x"], "f.g.x"); + ASSERT_EQ(map["f.g.xyz.foobarbaz"], "f.g.xyz.foobarbaz"); + } + { + auto renames = renamedPaths( + {"f.g.x", "f.g.xyz.foobarbaz"}, renameCDotDToEPreserveFDotG, Direction::kBackward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 2UL); + ASSERT_EQ(map["f.g.x"], "f.g.x"); + ASSERT_EQ(map["f.g.xyz.foobarbaz"], "f.g.xyz.foobarbaz"); + } + { + // This shares a prefix with the unmodified path, but should not be reported as unmodified. + auto renames = renamedPaths({"f.x"}, renameCDotDToEPreserveFDotG, Direction::kForward); + ASSERT_FALSE(static_cast<bool>(renames)); + } +} + +class RenameAToXDotYModifyCDotD : public DocumentSourceTestOptimizations { +public: + RenameAToXDotYModifyCDotD() : DocumentSourceTestOptimizations() {} + + GetModPathsReturn getModifiedPaths() const final { + return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{"c.d"}, {{"x.y", "a"}}}; + } +}; + +TEST(SemanticAnalysisRenamedPaths, DoesReturnRenameToDottedFieldFromFiniteSetRename) { + RenameAToXDotYModifyCDotD renameAToXDotYModifyCDotD; + { + auto renames = renamedPaths({"a"}, renameAToXDotYModifyCDotD, Direction::kForward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["a"], "x.y"); + } + { + auto renames = renamedPaths({"x.y"}, renameAToXDotYModifyCDotD, Direction::kBackward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 1UL); + ASSERT_EQ(map["x.y"], "a"); + } + { + auto renames = + renamedPaths({"a.z", "a.a.b.c"}, renameAToXDotYModifyCDotD, Direction::kForward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 2UL); + ASSERT_EQ(map["a.z"], "x.y.z"); + ASSERT_EQ(map["a.a.b.c"], "x.y.a.b.c"); + } + { + auto renames = + renamedPaths({"x.y.z", "x.y.a.b.c"}, renameAToXDotYModifyCDotD, Direction::kBackward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 2UL); + ASSERT_EQ(map["x.y.z"], "a.z"); + ASSERT_EQ(map["x.y.a.b.c"], "a.a.b.c"); + } +} + +TEST(SemanticAnalysisRenamedPaths, DoesNotTreatPrefixAsUnmodifiedWhenSuffixIsPartOfModifiedSet) { + RenameAToXDotYModifyCDotD renameAToXDotYModifyCDotD; + { + auto renames = renamedPaths({"c"}, renameAToXDotYModifyCDotD, Direction::kForward); + ASSERT_FALSE(static_cast<bool>(renames)); + } + { + auto renames = renamedPaths({"c.d"}, renameAToXDotYModifyCDotD, Direction::kForward); + ASSERT_FALSE(static_cast<bool>(renames)); + } + { + auto renames = renamedPaths({"c.d.e"}, renameAToXDotYModifyCDotD, Direction::kForward); + ASSERT_FALSE(static_cast<bool>(renames)); + } + { + auto renames = renamedPaths({"c"}, renameAToXDotYModifyCDotD, Direction::kBackward); + ASSERT_FALSE(static_cast<bool>(renames)); + } + { + auto renames = renamedPaths({"c.d"}, renameAToXDotYModifyCDotD, Direction::kBackward); + ASSERT_FALSE(static_cast<bool>(renames)); + } + { + auto renames = renamedPaths({"c.d.e"}, renameAToXDotYModifyCDotD, Direction::kBackward); + ASSERT_FALSE(static_cast<bool>(renames)); + } + { + auto renames = + renamedPaths({"c.not_d", "c.decoy"}, renameAToXDotYModifyCDotD, Direction::kForward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 2UL); + ASSERT_EQ(map["c.not_d"], "c.not_d"); + ASSERT_EQ(map["c.decoy"], "c.decoy"); + } + { + auto renames = + renamedPaths({"c.not_d", "c.decoy"}, renameAToXDotYModifyCDotD, Direction::kBackward); + ASSERT(static_cast<bool>(renames)); + auto map = *renames; + ASSERT_EQ(map.size(), 2UL); + ASSERT_EQ(map["c.not_d"], "c.not_d"); + ASSERT_EQ(map["c.decoy"], "c.decoy"); + } +} + +class ModifiesAllPaths : public DocumentSourceTestOptimizations { +public: + ModifiesAllPaths() : DocumentSourceTestOptimizations() {} + GetModPathsReturn getModifiedPaths() const final { + return {GetModPathsReturn::Type::kAllPaths, std::set<std::string>{}, {}}; + } +}; + +TEST(SemanticAnalysisRenamedPaths, ReturnsNoneWhenAllPathsAreModified) { + ModifiesAllPaths modifiesAllPaths; + { + auto renames = renamedPaths({"a"}, modifiesAllPaths, Direction::kForward); + ASSERT_FALSE(static_cast<bool>(renames)); + } + { + auto renames = renamedPaths({"a", "b", "c.d"}, modifiesAllPaths, Direction::kForward); + ASSERT_FALSE(static_cast<bool>(renames)); + } + { + auto renames = renamedPaths({"a"}, modifiesAllPaths, Direction::kBackward); + ASSERT_FALSE(static_cast<bool>(renames)); + } + { + auto renames = renamedPaths({"a", "b", "c.d"}, modifiesAllPaths, Direction::kBackward); + ASSERT_FALSE(static_cast<bool>(renames)); + } +} + +class ModificationsUnknown : public DocumentSourceTestOptimizations { +public: + ModificationsUnknown() : DocumentSourceTestOptimizations() {} + GetModPathsReturn getModifiedPaths() const final { + return {GetModPathsReturn::Type::kNotSupported, std::set<std::string>{}, {}}; + } +}; + +TEST(SemanticAnalysisRenamedPaths, ReturnsNoneWhenModificationsAreNotKnown) { + ModificationsUnknown modificationsUnknown; + { + auto renames = renamedPaths({"a"}, modificationsUnknown, Direction::kForward); + ASSERT_FALSE(static_cast<bool>(renames)); + } + { + auto renames = renamedPaths({"a", "b", "c.d"}, modificationsUnknown, Direction::kForward); + ASSERT_FALSE(static_cast<bool>(renames)); + } + { + auto renames = renamedPaths({"a"}, modificationsUnknown, Direction::kBackward); + ASSERT_FALSE(static_cast<bool>(renames)); + } + { + auto renames = renamedPaths({"a", "b", "c.d"}, modificationsUnknown, Direction::kBackward); + ASSERT_FALSE(static_cast<bool>(renames)); + } +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/s/query/cluster_aggregation_planner.cpp b/src/mongo/s/query/cluster_aggregation_planner.cpp index 165140819ed..c11be89a9be 100644 --- a/src/mongo/s/query/cluster_aggregation_planner.cpp +++ b/src/mongo/s/query/cluster_aggregation_planner.cpp @@ -41,6 +41,7 @@ #include "mongo/db/pipeline/document_source_skip.h" #include "mongo/db/pipeline/document_source_sort.h" #include "mongo/db/pipeline/document_source_unwind.h" +#include "mongo/db/pipeline/semantic_analysis.h" #include "mongo/executor/task_executor_pool.h" #include "mongo/s/catalog_cache.h" #include "mongo/s/cluster_commands_helpers.h" @@ -339,7 +340,7 @@ StringMap<std::string> computeShardKeyRenameMap(const Pipeline* mergePipeline, // global groups. Thus we want to exclude this stage from our rename tracking. traversalEnd = std::prev(traversalEnd); } - auto renameMap = Pipeline::renamedPaths(traversalStart, traversalEnd, pathsOfShardKey); + auto renameMap = semantic_analysis::renamedPaths(traversalStart, traversalEnd, pathsOfShardKey); invariant(renameMap, str::stream() << "Analyzed pipeline was thought to preserve the shard key fields, but did not: " @@ -359,7 +360,8 @@ bool anyStageModifiesShardKeyOrNeedsMerge(std::set<std::string> shardKeyPaths, const auto& stages = mergePipeline->getSources(); for (auto it = stages.crbegin(); it != stages.crend(); ++it) { const auto& stage = *it; - auto renames = stage->renamedPaths(std::move(shardKeyPaths)); + auto renames = semantic_analysis::renamedPaths( + std::move(shardKeyPaths), *stage, semantic_analysis::Direction::kBackward); if (!renames) { return true; } @@ -415,7 +417,7 @@ boost::optional<ShardedExchangePolicy> walkPipelineBackwardsTrackingShardKey( // chunk has an associated range [from, to); i.e. inclusive lower bound and exclusive upper // bound. The chunk ranges must cover all domain without any holes. For the exchange we coalesce // ranges into a single vector of points. E.g. chunks [min,5], [5,10], [10,max] will produce - // [min,5,10,max] vector. Number of points in the vector is always one grater than number of + // [min,5,10,max] vector. Number of points in the vector is always one greater than number of // chunks. // We also compute consumer indices for every chunk. From the example above (3 chunks) we may // get the vector [0,1,2]; i.e. the first chunk goes to the consumer 0 and so on. Note that @@ -553,9 +555,6 @@ boost::optional<ShardedExchangePolicy> checkIfEligibleForExchange(OperationConte return boost::none; } - const auto grid = Grid::get(opCtx); - invariant(grid); - if (mergePipeline->getSources().empty()) { return boost::none; } |