summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorGeorge Wangensteen <george.wangensteen@10gen.com>2019-06-19 15:24:38 -0400
committerGeorge Wangensteen <george.wangensteen@10gen.com>2019-06-28 16:01:59 -0400
commitb651ed3dc6ae61332bb3193afef1cfa41bf5df53 (patch)
treed2fd427998d5ec995aeaa4faf8d13e597e745aa5 /src/mongo
parentabe43b7aaddd8254c51eb2c8bfce99b9e11f8fee (diff)
downloadmongo-b651ed3dc6ae61332bb3193afef1cfa41bf5df53.tar.gz
SERVER-41750 refactor renamed paths API.
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/pipeline/SConscript3
-rw-r--r--src/mongo/db/pipeline/document_source.cpp132
-rw-r--r--src/mongo/db/pipeline/document_source.h8
-rw-r--r--src/mongo/db/pipeline/document_source_project_test.cpp24
-rw-r--r--src/mongo/db/pipeline/document_source_test.cpp284
-rw-r--r--src/mongo/db/pipeline/pipeline.cpp42
-rw-r--r--src/mongo/db/pipeline/pipeline.h21
-rw-r--r--src/mongo/db/pipeline/pipeline_test.cpp221
-rw-r--r--src/mongo/db/pipeline/semantic_analysis.cpp246
-rw-r--r--src/mongo/db/pipeline/semantic_analysis.h102
-rw-r--r--src/mongo/db/pipeline/semantic_analysis_test.cpp434
-rw-r--r--src/mongo/s/query/cluster_aggregation_planner.cpp11
12 files changed, 1000 insertions, 528 deletions
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript
index e46d6b5f96a..adca346de94 100644
--- a/src/mongo/db/pipeline/SConscript
+++ b/src/mongo/db/pipeline/SConscript
@@ -318,6 +318,7 @@ pipelineeEnv.Library(
'document_source_sort_by_count.cpp',
'document_source_tee_consumer.cpp',
'document_source_unwind.cpp',
+ 'semantic_analysis.cpp',
'pipeline.cpp',
'sequential_document_cache.cpp',
'stage_constraints.cpp',
@@ -450,7 +451,6 @@ env.CppUnitTest(
'document_source_skip_test.cpp',
'document_source_sort_by_count_test.cpp',
'document_source_sort_test.cpp',
- 'document_source_test.cpp',
'document_source_unwind_test.cpp',
'document_value_test.cpp',
'document_value_test_util_self_test.cpp',
@@ -472,6 +472,7 @@ env.CppUnitTest(
'pipeline_test.cpp',
'process_interface_standalone_test.cpp',
'resume_token_test.cpp',
+ 'semantic_analysis_test.cpp',
'sequential_document_cache_test.cpp',
'tee_buffer_test.cpp',
'value_comparator_test.cpp',
diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp
index 24775843286..75911585dfe 100644
--- a/src/mongo/db/pipeline/document_source.cpp
+++ b/src/mongo/db/pipeline/document_source.cpp
@@ -39,6 +39,7 @@
#include "mongo/db/pipeline/document_source_sequential_document_cache.h"
#include "mongo/db/pipeline/expression_context.h"
#include "mongo/db/pipeline/field_path.h"
+#include "mongo/db/pipeline/semantic_analysis.h"
#include "mongo/db/pipeline/value.h"
#include "mongo/util/string_map.h"
@@ -96,41 +97,6 @@ intrusive_ptr<DocumentSource> DocumentSource::optimize() {
namespace {
/**
- * Given a set of paths 'dependencies', determines which of those paths will be modified if all
- * paths except those in 'preservedPaths' are modified.
- *
- * For example, extractModifiedDependencies({'a', 'b', 'c.d', 'e'}, {'a', 'b.c', c'}) returns
- * {'b', 'e'}, since 'b' and 'e' are not preserved (only 'b.c' is preserved).
- */
-std::set<std::string> extractModifiedDependencies(const std::set<std::string>& dependencies,
- const std::set<std::string>& preservedPaths) {
- std::set<std::string> modifiedDependencies;
-
- // The modified dependencies is *almost* the set difference 'dependencies' - 'preservedPaths',
- // except that if p in 'preservedPaths' is a "path prefix" of d in 'dependencies', then 'd'
- // should not be included in the modified dependencies.
- for (auto&& dependency : dependencies) {
- bool preserved = false;
- auto firstField = FieldPath::extractFirstFieldFromDottedPath(dependency).toString();
- // If even a prefix is preserved, the path is preserved, so search for any prefixes of
- // 'dependency' as well. 'preservedPaths' is an *ordered* set, so we only have to search the
- // range ['firstField', 'dependency'] to find any prefixes of 'dependency'.
- for (auto it = preservedPaths.lower_bound(firstField);
- it != preservedPaths.upper_bound(dependency);
- ++it) {
- if (*it == dependency || expression::isPathPrefixOf(*it, dependency)) {
- preserved = true;
- break;
- }
- }
- if (!preserved) {
- modifiedDependencies.insert(dependency);
- }
- }
- return modifiedDependencies;
-}
-
-/**
* Returns a pair of pointers to $match stages, either of which can be null. The first entry in the
* pair is a $match stage that can be moved before this stage, the second is a $match stage that
* must remain after this stage.
@@ -158,59 +124,14 @@ splitMatchByModifiedFields(const boost::intrusive_ptr<DocumentSourceMatch>& matc
for (auto&& rename : modifiedPathsRet.renames) {
preservedPaths.insert(rename.first);
}
- modifiedPaths = extractModifiedDependencies(depsTracker.fields, preservedPaths);
+ modifiedPaths =
+ semantic_analysis::extractModifiedDependencies(depsTracker.fields, preservedPaths);
}
}
return match->splitSourceBy(modifiedPaths, modifiedPathsRet.renames);
}
/**
- * If 'pathOfInterest' or some path prefix of 'pathOfInterest' is renamed, returns the new name for
- * 'pathOfInterest', otherwise returns boost::none.
- * For example, if 'renamedPaths' is {"c.d", "c"}, and 'pathOfInterest' is "c.d.f", returns "c.f".
- */
-boost::optional<std::string> findNewName(const StringMap<std::string>& renamedPaths,
- std::string pathOfInterest) {
- FieldPath fullPathOfInterest(pathOfInterest);
- StringBuilder toLookup;
- std::size_t pathIndex = 0;
- while (pathIndex < fullPathOfInterest.getPathLength()) {
- if (pathIndex != 0) {
- toLookup << ".";
- }
- toLookup << fullPathOfInterest.getFieldName(pathIndex++);
-
- auto it = renamedPaths.find(toLookup.stringData());
- if (it != renamedPaths.end()) {
- const auto& newPathOfPrefix = it->second;
- // We found a rename! Note this might be a rename of the prefix of the path, so we have
- // to add back on the suffix that was unchanged.
- StringBuilder renamedPath;
- renamedPath << newPathOfPrefix;
- while (pathIndex < fullPathOfInterest.getPathLength()) {
- renamedPath << "." << fullPathOfInterest.getFieldName(pathIndex++);
- }
- return {renamedPath.str()};
- }
- }
- return boost::none;
-}
-
-StringMap<std::string> computeNewNamesAssumingAnyPathsNotRenamedAreUnmodified(
- const StringMap<std::string>& renamedPaths, const std::set<std::string>& pathsOfInterest) {
- StringMap<std::string> renameOut;
- for (auto&& ofInterest : pathsOfInterest) {
- if (auto newName = findNewName(renamedPaths, ofInterest)) {
- renameOut[ofInterest] = *newName;
- } else {
- // This path was not renamed, assume it was unchanged and map it to itself.
- renameOut[ofInterest] = ofInterest;
- }
- }
- return renameOut;
-}
-
-/**
* Verifies whether or not a $group is able to swap with a succeeding $match stage. While ordinarily
* $group can swap with a $match, it cannot if the following $match has an $exists predicate on _id,
* and the $group has exactly one field as the $group key. This is because every document will have
@@ -292,53 +213,6 @@ Pipeline::SourceContainer::iterator DocumentSource::optimizeAt(
return doOptimizeAt(itr, container);
}
-boost::optional<StringMap<std::string>> DocumentSource::renamedPaths(
- const std::set<std::string>& pathsOfInterest) const {
- auto modifiedPathsRet = this->getModifiedPaths();
- switch (modifiedPathsRet.type) {
- case DocumentSource::GetModPathsReturn::Type::kNotSupported:
- case DocumentSource::GetModPathsReturn::Type::kAllPaths:
- return boost::none;
- case DocumentSource::GetModPathsReturn::Type::kFiniteSet: {
- for (auto&& modified : modifiedPathsRet.paths) {
- for (auto&& ofInterest : pathsOfInterest) {
- // Any overlap of the path means the path of interest is not preserved. For
- // example, if the path of interest is "a.b", then a modified path of "a",
- // "a.b", or "a.b.c" would all signal that "a.b" is not preserved.
- if (ofInterest == modified ||
- expression::isPathPrefixOf(ofInterest, modified) ||
- expression::isPathPrefixOf(modified, ofInterest)) {
- // This stage modifies at least one of the fields which the caller is
- // interested in, bail out.
- return boost::none;
- }
- }
- }
-
- // None of the paths of interest were modified, construct the result map, mapping
- // the names after this stage to the names before this stage.
- return computeNewNamesAssumingAnyPathsNotRenamedAreUnmodified(modifiedPathsRet.renames,
- pathsOfInterest);
- }
- case DocumentSource::GetModPathsReturn::Type::kAllExcept: {
- auto preservedPaths = modifiedPathsRet.paths;
- for (auto&& rename : modifiedPathsRet.renames) {
- // For the purposes of checking which paths are modified, consider renames to
- // preserve the path. We'll circle back later to figure out the new name if
- // appropriate.
- preservedPaths.insert(rename.first);
- }
- auto modifiedPaths = extractModifiedDependencies(pathsOfInterest, preservedPaths);
- if (modifiedPaths.empty()) {
- return computeNewNamesAssumingAnyPathsNotRenamedAreUnmodified(
- modifiedPathsRet.renames, pathsOfInterest);
- }
- return boost::none;
- }
- }
- MONGO_UNREACHABLE;
-}
-
void DocumentSource::serializeToArray(vector<Value>& array,
boost::optional<ExplainOptions::Verbosity> explain) const {
Value entry = serialize(explain);
diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h
index d1b5f3c06ba..435219fdfda 100644
--- a/src/mongo/db/pipeline/document_source.h
+++ b/src/mongo/db/pipeline/document_source.h
@@ -438,14 +438,6 @@ public:
}
/**
- * Given 'currentNames' which describes a set of paths which the caller is interested in,
- * returns boost::none if any of those paths are modified by this stage, or a mapping from
- * their old name to their new name if they are preserved but possibly renamed by this stage.
- */
- boost::optional<StringMap<std::string>> renamedPaths(
- const std::set<std::string>& currentNames) const;
-
- /**
* Get the dependencies this operation needs to do its job. If overridden, subclasses must add
* all paths needed to apply their transformation to 'deps->fields', and call
* 'deps->setNeedsMetadata()' to indicate what metadata (e.g. text score), if any, is required.
diff --git a/src/mongo/db/pipeline/document_source_project_test.cpp b/src/mongo/db/pipeline/document_source_project_test.cpp
index ad0eecfe573..34b04a1ce2e 100644
--- a/src/mongo/db/pipeline/document_source_project_test.cpp
+++ b/src/mongo/db/pipeline/document_source_project_test.cpp
@@ -41,6 +41,7 @@
#include "mongo/db/pipeline/document_source_mock.h"
#include "mongo/db/pipeline/document_source_project.h"
#include "mongo/db/pipeline/document_value_test_util.h"
+#include "mongo/db/pipeline/semantic_analysis.h"
#include "mongo/db/pipeline/value.h"
#include "mongo/unittest/unittest.h"
@@ -266,6 +267,29 @@ TEST_F(ProjectStageTest, CanUseRemoveSystemVariableToConditionallyExcludeProject
ASSERT(project->getNext().isEOF());
}
+TEST_F(ProjectStageTest, ProjectionCorrectlyReportsRenamesForwards) {
+ auto project = DocumentSourceProject::create(fromjson("{'renamedB' : '$b'}"), getExpCtx());
+ auto renames =
+ semantic_analysis::renamedPaths({"b"}, *project, semantic_analysis::Direction::kForward);
+ // renamedPaths should return a mapping of old name->new name for each path in interestingPaths
+ // if the paths are all unmodified (but possibly renamed). Because path b is preserved, but
+ // renamed (to renamedB) we expect a renamedPaths to return a mapping from b->renamedB.
+ auto single_rename = renames->extract("b");
+ ASSERT_FALSE(single_rename.empty());
+ ASSERT_EQUALS(single_rename.mapped(), "renamedB");
+ ASSERT_TRUE(renames->empty());
+}
+
+TEST_F(ProjectStageTest, ProjectionCorrectlyReportsRenamesBackwards) {
+ auto project = DocumentSourceProject::create(fromjson("{'renamedB' : '$b'}"), getExpCtx());
+ auto renames = semantic_analysis::renamedPaths(
+ {"renamedB"}, *project, semantic_analysis::Direction::kBackward);
+ auto single_rename = renames->extract("renamedB");
+ ASSERT_FALSE(single_rename.empty());
+ ASSERT_EQUALS(single_rename.mapped(), "b");
+ ASSERT_TRUE(renames->empty());
+}
+
/**
* Creates BSON for a DocumentSourceProject that represents projecting a new computed field nested
* 'depth' levels deep.
diff --git a/src/mongo/db/pipeline/document_source_test.cpp b/src/mongo/db/pipeline/document_source_test.cpp
deleted file mode 100644
index e5bf55c4151..00000000000
--- a/src/mongo/db/pipeline/document_source_test.cpp
+++ /dev/null
@@ -1,284 +0,0 @@
-/**
- * Copyright (C) 2018-present MongoDB, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the Server Side Public License, version 1,
- * as published by MongoDB, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Server Side Public License for more details.
- *
- * You should have received a copy of the Server Side Public License
- * along with this program. If not, see
- * <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the Server Side Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/bson/bsonmisc.h"
-#include "mongo/bson/bsonobj.h"
-#include "mongo/bson/bsonobjbuilder.h"
-#include "mongo/db/pipeline/document_source.h"
-#include "mongo/db/pipeline/document_source_test_optimizations.h"
-#include "mongo/db/service_context_test_fixture.h"
-#include "mongo/unittest/unittest.h"
-
-namespace mongo {
-
-namespace {
-
-class RenamesAToB : public DocumentSourceTestOptimizations {
-public:
- RenamesAToB() : DocumentSourceTestOptimizations() {}
- GetModPathsReturn getModifiedPaths() const final {
- // Pretend this stage simply renames the "a" field to be "b", leaving the value of "a" the
- // same. This would be the equivalent of an {$addFields: {b: "$a"}}.
- return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}, {{"b", "a"}}};
- }
-};
-
-TEST(DocumentSourceRenamedPaths, DoesReturnSimpleRenameFromFiniteSetRename) {
- RenamesAToB renamesAToB;
- auto renames = renamesAToB.renamedPaths({"b"});
- ASSERT(static_cast<bool>(renames));
- auto map = *renames;
- ASSERT_EQ(map.size(), 1UL);
- ASSERT_EQ(map["b"], "a");
-}
-
-TEST(DocumentSourceRenamedPaths, ReturnsSimpleMapForUnaffectedFieldsFromFiniteSetRename) {
- RenamesAToB renamesAToB;
- {
- auto renames = renamesAToB.renamedPaths({"c"});
- ASSERT(static_cast<bool>(renames));
- auto map = *renames;
- ASSERT_EQ(map.size(), 1UL);
- ASSERT_EQ(map["c"], "c");
- }
-
- {
- auto renames = renamesAToB.renamedPaths({"a"});
- ASSERT(static_cast<bool>(renames));
- auto map = *renames;
- ASSERT_EQ(map.size(), 1UL);
- ASSERT_EQ(map["a"], "a");
- }
-
- {
- auto renames = renamesAToB.renamedPaths({"e", "f", "g"});
- ASSERT(static_cast<bool>(renames));
- auto map = *renames;
- ASSERT_EQ(map.size(), 3UL);
- ASSERT_EQ(map["e"], "e");
- ASSERT_EQ(map["f"], "f");
- ASSERT_EQ(map["g"], "g");
- }
-}
-
-class RenameCToDPreserveEFG : public DocumentSourceTestOptimizations {
-public:
- RenameCToDPreserveEFG() : DocumentSourceTestOptimizations() {}
-
- GetModPathsReturn getModifiedPaths() const final {
- return {GetModPathsReturn::Type::kAllExcept,
- std::set<std::string>{"e", "f", "g"},
- {{"d", "c"}}};
- }
-};
-
-TEST(DocumentSourceRenamedPaths, DoesReturnSimpleRenameFromAllExceptRename) {
- RenameCToDPreserveEFG renameCToDPreserveEFG;
- auto renames = renameCToDPreserveEFG.renamedPaths({"d"});
- ASSERT(static_cast<bool>(renames));
- auto map = *renames;
- ASSERT_EQ(map.size(), 1UL);
- ASSERT_EQ(map["d"], "c");
-}
-
-TEST(DocumentSourceRenamedPaths, ReturnsSimpleMapForUnaffectedFieldsFromAllExceptRename) {
- RenameCToDPreserveEFG renameCToDPreserveEFG;
- {
- auto renames = renameCToDPreserveEFG.renamedPaths({"e"});
- ASSERT(static_cast<bool>(renames));
- auto map = *renames;
- ASSERT_EQ(map.size(), 1UL);
- ASSERT_EQ(map["e"], "e");
- }
-
- {
- auto renames = renameCToDPreserveEFG.renamedPaths({"f", "g"});
- ASSERT(static_cast<bool>(renames));
- auto map = *renames;
- ASSERT_EQ(map.size(), 2UL);
- ASSERT_EQ(map["f"], "f");
- ASSERT_EQ(map["g"], "g");
- }
-}
-
-class RenameCDotDToEPreserveFDotG : public DocumentSourceTestOptimizations {
-public:
- RenameCDotDToEPreserveFDotG() : DocumentSourceTestOptimizations() {}
-
- GetModPathsReturn getModifiedPaths() const final {
- return {GetModPathsReturn::Type::kAllExcept, std::set<std::string>{"f.g"}, {{"e", "c.d"}}};
- }
-};
-
-TEST(DocumentSourceRenamedPaths, DoesReturnRenameToDottedFieldFromAllExceptRename) {
- RenameCDotDToEPreserveFDotG renameCDotDToEPreserveFDotG;
- {
- auto renames = renameCDotDToEPreserveFDotG.renamedPaths({"e"});
- ASSERT(static_cast<bool>(renames));
- auto map = *renames;
- ASSERT_EQ(map.size(), 1UL);
- ASSERT_EQ(map["e"], "c.d");
- }
- {
- auto renames = renameCDotDToEPreserveFDotG.renamedPaths({"e.x", "e.y"});
- ASSERT(static_cast<bool>(renames));
- auto map = *renames;
- ASSERT_EQ(map.size(), 2UL);
- ASSERT_EQ(map["e.x"], "c.d.x");
- ASSERT_EQ(map["e.y"], "c.d.y");
- }
-}
-
-TEST(DocumentSourceRenamedPaths, DoesNotTreatPrefixAsUnmodifiedWhenSuffixIsModifiedFromAllExcept) {
- RenameCDotDToEPreserveFDotG renameCDotDToEPreserveFDotG;
- {
- auto renames = renameCDotDToEPreserveFDotG.renamedPaths({"f"});
- ASSERT_FALSE(static_cast<bool>(renames));
- }
- {
- // This is the exception, the only path that is not modified.
- auto renames = renameCDotDToEPreserveFDotG.renamedPaths({"f.g"});
- ASSERT(static_cast<bool>(renames));
- auto map = *renames;
- ASSERT_EQ(map.size(), 1UL);
- ASSERT_EQ(map["f.g"], "f.g");
- }
- {
- // We know "f.g" is preserved, so it follows that a subpath of that path is also preserved.
- auto renames = renameCDotDToEPreserveFDotG.renamedPaths({"f.g.x", "f.g.xyz.foobarbaz"});
- ASSERT(static_cast<bool>(renames));
- auto map = *renames;
- ASSERT_EQ(map.size(), 2UL);
- ASSERT_EQ(map["f.g.x"], "f.g.x");
- ASSERT_EQ(map["f.g.xyz.foobarbaz"], "f.g.xyz.foobarbaz");
- }
-
- {
- // This shares a prefix with the unmodified path, but should not be reported as unmodified.
- auto renames = renameCDotDToEPreserveFDotG.renamedPaths({"f.x"});
- ASSERT_FALSE(static_cast<bool>(renames));
- }
-}
-
-class RenameAToXDotYModifyCDotD : public DocumentSourceTestOptimizations {
-public:
- RenameAToXDotYModifyCDotD() : DocumentSourceTestOptimizations() {}
-
- GetModPathsReturn getModifiedPaths() const final {
- return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{"c.d"}, {{"x.y", "a"}}};
- }
-};
-
-TEST(DocumentSourceRenamedPaths, DoesReturnRenameToDottedFieldFromFiniteSetRename) {
- RenameAToXDotYModifyCDotD renameAToXDotYModifyCDotD;
- {
- auto renames = renameAToXDotYModifyCDotD.renamedPaths({"x.y"});
- ASSERT(static_cast<bool>(renames));
- auto map = *renames;
- ASSERT_EQ(map.size(), 1UL);
- ASSERT_EQ(map["x.y"], "a");
- }
- {
- auto renames = renameAToXDotYModifyCDotD.renamedPaths({"x.y.z", "x.y.a.b.c"});
- ASSERT(static_cast<bool>(renames));
- auto map = *renames;
- ASSERT_EQ(map.size(), 2UL);
- ASSERT_EQ(map["x.y.z"], "a.z");
- ASSERT_EQ(map["x.y.a.b.c"], "a.a.b.c");
- }
-}
-
-TEST(DocumentSourceRenamedPaths, DoesNotTreatPrefixAsUnmodifiedWhenSuffixIsPartOfModifiedSet) {
- RenameAToXDotYModifyCDotD renameAToXDotYModifyCDotD;
- {
- auto renames = renameAToXDotYModifyCDotD.renamedPaths({"c"});
- ASSERT_FALSE(static_cast<bool>(renames));
- }
- {
- auto renames = renameAToXDotYModifyCDotD.renamedPaths({"c.d"});
- ASSERT_FALSE(static_cast<bool>(renames));
- }
- {
- auto renames = renameAToXDotYModifyCDotD.renamedPaths({"c.d.e"});
- ASSERT_FALSE(static_cast<bool>(renames));
- }
- {
- auto renames = renameAToXDotYModifyCDotD.renamedPaths({"c.not_d", "c.decoy"});
- ASSERT(static_cast<bool>(renames));
- auto map = *renames;
- ASSERT_EQ(map.size(), 2UL);
- ASSERT_EQ(map["c.not_d"], "c.not_d");
- ASSERT_EQ(map["c.decoy"], "c.decoy");
- }
-}
-
-class ModifiesAllPaths : public DocumentSourceTestOptimizations {
-public:
- ModifiesAllPaths() : DocumentSourceTestOptimizations() {}
- GetModPathsReturn getModifiedPaths() const final {
- return {GetModPathsReturn::Type::kAllPaths, std::set<std::string>{}, {}};
- }
-};
-
-TEST(DocumentSourceRenamedPaths, ReturnsNoneWhenAllPathsAreModified) {
- ModifiesAllPaths modifiesAllPaths;
- {
- auto renames = modifiesAllPaths.renamedPaths({"a"});
- ASSERT_FALSE(static_cast<bool>(renames));
- }
- {
- auto renames = modifiesAllPaths.renamedPaths({"a", "b", "c.d"});
- ASSERT_FALSE(static_cast<bool>(renames));
- }
-}
-
-class ModificationsUnknown : public DocumentSourceTestOptimizations {
-public:
- ModificationsUnknown() : DocumentSourceTestOptimizations() {}
- GetModPathsReturn getModifiedPaths() const final {
- return {GetModPathsReturn::Type::kNotSupported, std::set<std::string>{}, {}};
- }
-};
-
-TEST(DocumentSourceRenamedPaths, ReturnsNoneWhenModificationsAreNotKnown) {
- ModificationsUnknown modificationsUnknown;
- {
- auto renames = modificationsUnknown.renamedPaths({"a"});
- ASSERT_FALSE(static_cast<bool>(renames));
- }
- {
- auto renames = modificationsUnknown.renamedPaths({"a", "b", "c.d"});
- ASSERT_FALSE(static_cast<bool>(renames));
- }
-}
-
-} // namespace
-} // namespace mongo
diff --git a/src/mongo/db/pipeline/pipeline.cpp b/src/mongo/db/pipeline/pipeline.cpp
index 23484931890..73b14b80262 100644
--- a/src/mongo/db/pipeline/pipeline.cpp
+++ b/src/mongo/db/pipeline/pipeline.cpp
@@ -500,48 +500,6 @@ void Pipeline::addFinalSource(intrusive_ptr<DocumentSource> source) {
_sources.push_back(source);
}
-boost::optional<StringMap<std::string>> Pipeline::renamedPaths(
- SourceContainer::const_reverse_iterator rstart,
- SourceContainer::const_reverse_iterator rend,
- std::set<std::string> pathsOfInterest) {
- // Use a vector to give a path id to each input path. A path's id is its index in the vector.
- const std::vector<string> inputPaths(pathsOfInterest.begin(), pathsOfInterest.end());
- std::vector<string> currentPaths(pathsOfInterest.begin(), pathsOfInterest.end());
-
- // Loop backwards over the stages. We will re-use 'pathsOfInterest', modifying that set each
- // time to be the current set of field's we're interested in. At the same time, we will maintain
- // 'currentPaths'. 'pathsOfInterest' is used to compute the renames, while 'currentPaths' is
- // used to tie a path back to its id.
- //
- // Interestingly, 'currentPaths' may contain duplicates. For example, if a stage like
- // {$addFields: {a: "$b"}} duplicates the value of "a" and both paths are of interest, then
- // 'currentPaths' may begin as ["a", "b"] representing the paths after the $addFields stage, but
- // becomes ["a", "a"] via the rename.
- for (auto it = rstart; it != rend; ++it) {
- boost::optional<StringMap<string>> renamed = (*it)->renamedPaths(pathsOfInterest);
- if (!renamed) {
- return boost::none;
- }
- pathsOfInterest.clear();
- for (std::size_t pathId = 0; pathId < inputPaths.size(); ++pathId) {
- currentPaths[pathId] = (*renamed)[currentPaths[pathId]];
- pathsOfInterest.insert(currentPaths[pathId]);
- }
- }
-
- // We got all the way through the pipeline via renames! Construct the mapping from path at the
- // end of the pipeline to path at the beginning.
- StringMap<string> renameMap;
- for (std::size_t pathId = 0; pathId < currentPaths.size(); ++pathId) {
- renameMap[inputPaths[pathId]] = currentPaths[pathId];
- }
- return renameMap;
-}
-
-boost::optional<StringMap<string>> Pipeline::renamedPaths(std::set<string> pathsOfInterest) const {
- return renamedPaths(_sources.rbegin(), _sources.rend(), std::move(pathsOfInterest));
-}
-
DepsTracker Pipeline::getDependencies(DepsTracker::MetadataAvailable metadataAvailable) const {
DepsTracker deps(metadataAvailable);
const bool scopeHasVariables = pCtx->variablesParseState.hasDefinedVariables();
diff --git a/src/mongo/db/pipeline/pipeline.h b/src/mongo/db/pipeline/pipeline.h
index 75249314225..fb4f3d988ee 100644
--- a/src/mongo/db/pipeline/pipeline.h
+++ b/src/mongo/db/pipeline/pipeline.h
@@ -133,19 +133,6 @@ public:
*/
static bool aggHasWriteStage(const BSONObj& cmd);
- /**
- * Given 'pathsOfInterest' which describes a set of paths which the caller is interested in,
- * returns boost::none if any of those paths are modified by the section of a pipeline
- * described by 'rstart' and 'rend', or a mapping from their name at the end of the pipeline to
- * their name at the beginning of the pipeline if they are preserved but possibly renamed by
- * this pipeline. Note that the analysis proceeds backwards, so the iterators must be reverse
- * iterators.
- */
- static boost::optional<StringMap<std::string>> renamedPaths(
- SourceContainer::const_reverse_iterator rstart,
- SourceContainer::const_reverse_iterator rend,
- std::set<std::string> pathsOfInterest);
-
const boost::intrusive_ptr<ExpressionContext>& getContext() const {
return pCtx;
}
@@ -265,14 +252,6 @@ public:
*/
DepsTracker getDependencies(DepsTracker::MetadataAvailable metadataAvailable) const;
- /**
- * Given 'pathsOfInterest' which describes a set of paths which the caller is interested in,
- * returns boost::none if any of those paths are modified by this pipeline, or a mapping from
- * their name at the end of the pipeline to their name at the beginning of the pipeline if they
- * are preserved but possibly renamed by this pipeline.
- */
- boost::optional<StringMap<std::string>> renamedPaths(
- std::set<std::string> pathsOfInterest) const;
const SourceContainer& getSources() const {
return _sources;
diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp
index f547d963d69..3d59eff1dfa 100644
--- a/src/mongo/db/pipeline/pipeline_test.cpp
+++ b/src/mongo/db/pipeline/pipeline_test.cpp
@@ -54,6 +54,7 @@
#include "mongo/db/pipeline/expression_context_for_test.h"
#include "mongo/db/pipeline/field_path.h"
#include "mongo/db/pipeline/pipeline.h"
+#include "mongo/db/pipeline/semantic_analysis.h"
#include "mongo/db/pipeline/stub_mongo_process_interface.h"
#include "mongo/db/query/collation/collator_interface_mock.h"
#include "mongo/db/query/query_test_service_context.h"
@@ -2986,13 +2987,28 @@ TEST(PipelineRenameTracking, ReportsIdentityMapWhenEmpty) {
boost::intrusive_ptr<ExpressionContext> expCtx(new ExpressionContextForTest());
auto pipeline =
unittest::assertGet(Pipeline::create({DocumentSourceMock::createForTest()}, expCtx));
- auto renames = pipeline->renamedPaths({"a", "b", "c.d"});
- ASSERT(static_cast<bool>(renames));
- auto nameMap = *renames;
- ASSERT_EQ(nameMap.size(), 3UL);
- ASSERT_EQ(nameMap["a"], "a");
- ASSERT_EQ(nameMap["b"], "b");
- ASSERT_EQ(nameMap["c.d"], "c.d");
+ {
+ // Tracking renames backwards.
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"a", "b", "c.d"});
+ ASSERT(static_cast<bool>(renames));
+ auto nameMap = *renames;
+ ASSERT_EQ(nameMap.size(), 3UL);
+ ASSERT_EQ(nameMap["a"], "a");
+ ASSERT_EQ(nameMap["b"], "b");
+ ASSERT_EQ(nameMap["c.d"], "c.d");
+ }
+ {
+ // Tracking renames forwards.
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"a", "b", "c.d"});
+ ASSERT(static_cast<bool>(renames));
+ auto nameMap = *renames;
+ ASSERT_EQ(nameMap.size(), 3UL);
+ ASSERT_EQ(nameMap["a"], "a");
+ ASSERT_EQ(nameMap["b"], "b");
+ ASSERT_EQ(nameMap["c.d"], "c.d");
+ }
}
class NoModifications : public DocumentSourceTestOptimizations {
@@ -3013,9 +3029,24 @@ public:
TEST(PipelineRenameTracking, ReportsIdentityWhenNoStageModifiesAnything) {
boost::intrusive_ptr<ExpressionContext> expCtx(new ExpressionContextForTest());
{
+ // Tracking renames backwards.
+ auto pipeline = unittest::assertGet(Pipeline::create(
+ {DocumentSourceMock::createForTest(), NoModifications::create()}, expCtx));
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"a", "b", "c.d"});
+ ASSERT(static_cast<bool>(renames));
+ auto nameMap = *renames;
+ ASSERT_EQ(nameMap.size(), 3UL);
+ ASSERT_EQ(nameMap["a"], "a");
+ ASSERT_EQ(nameMap["b"], "b");
+ ASSERT_EQ(nameMap["c.d"], "c.d");
+ }
+ {
+ // Tracking renames forwards.
auto pipeline = unittest::assertGet(Pipeline::create(
{DocumentSourceMock::createForTest(), NoModifications::create()}, expCtx));
- auto renames = pipeline->renamedPaths({"a", "b", "c.d"});
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"a", "b", "c.d"});
ASSERT(static_cast<bool>(renames));
auto nameMap = *renames;
ASSERT_EQ(nameMap.size(), 3UL);
@@ -3024,13 +3055,29 @@ TEST(PipelineRenameTracking, ReportsIdentityWhenNoStageModifiesAnything) {
ASSERT_EQ(nameMap["c.d"], "c.d");
}
{
+ // Tracking renames backwards.
auto pipeline = unittest::assertGet(Pipeline::create({DocumentSourceMock::createForTest(),
NoModifications::create(),
NoModifications::create(),
NoModifications::create()},
expCtx));
- auto renames = pipeline->renamedPaths({"a", "b", "c.d"});
- ASSERT(static_cast<bool>(renames));
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"a", "b", "c.d"});
+ auto nameMap = *renames;
+ ASSERT_EQ(nameMap.size(), 3UL);
+ ASSERT_EQ(nameMap["a"], "a");
+ ASSERT_EQ(nameMap["b"], "b");
+ ASSERT_EQ(nameMap["c.d"], "c.d");
+ }
+ {
+ // Tracking renames forwards.
+ auto pipeline = unittest::assertGet(Pipeline::create({DocumentSourceMock::createForTest(),
+ NoModifications::create(),
+ NoModifications::create(),
+ NoModifications::create()},
+ expCtx));
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"a", "b", "c.d"});
auto nameMap = *renames;
ASSERT_EQ(nameMap.size(), 3UL);
ASSERT_EQ(nameMap["a"], "a");
@@ -3061,9 +3108,20 @@ TEST(PipelineRenameTracking, DoesNotReportRenamesIfAStageDoesNotSupportTrackingT
NotSupported::create(),
NoModifications::create()},
expCtx));
- ASSERT_FALSE(static_cast<bool>(pipeline->renamedPaths({"a"})));
- ASSERT_FALSE(static_cast<bool>(pipeline->renamedPaths({"a", "b"})));
- ASSERT_FALSE(static_cast<bool>(pipeline->renamedPaths({"x", "yahoo", "c.d"})));
+ // Backwards case.
+ ASSERT_FALSE(static_cast<bool>(semantic_analysis::renamedPaths(
+ pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"a"})));
+ ASSERT_FALSE(static_cast<bool>(semantic_analysis::renamedPaths(
+ pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"a", "b"})));
+ ASSERT_FALSE(static_cast<bool>(semantic_analysis::renamedPaths(
+ pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"x", "yahoo", "c.d"})));
+ // Forwards case.
+ ASSERT_FALSE(static_cast<bool>(semantic_analysis::renamedPaths(
+ pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"a"})));
+ ASSERT_FALSE(static_cast<bool>(semantic_analysis::renamedPaths(
+ pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"a", "b"})));
+ ASSERT_FALSE(static_cast<bool>(semantic_analysis::renamedPaths(
+ pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"x", "yahoo", "c.d"})));
}
class RenamesAToB : public DocumentSourceTestOptimizations {
@@ -3082,14 +3140,27 @@ TEST(PipelineRenameTracking, ReportsNewNamesWhenSingleStageRenames) {
auto pipeline = unittest::assertGet(
Pipeline::create({DocumentSourceMock::createForTest(), RenamesAToB::create()}, expCtx));
{
- auto renames = pipeline->renamedPaths({"b"});
+ // Tracking backwards.
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"b"});
ASSERT(static_cast<bool>(renames));
auto nameMap = *renames;
ASSERT_EQ(nameMap.size(), 1UL);
ASSERT_EQ(nameMap["b"], "a");
}
{
- auto renames = pipeline->renamedPaths({"b", "c.d"});
+ // Tracking forwards.
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"a"});
+ ASSERT(static_cast<bool>(renames));
+ auto nameMap = *renames;
+ ASSERT_EQ(nameMap.size(), 1UL);
+ ASSERT_EQ(nameMap["a"], "b");
+ }
+ {
+ // Tracking backwards.
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"b", "c.d"});
ASSERT(static_cast<bool>(renames));
auto nameMap = *renames;
ASSERT_EQ(nameMap.size(), 2UL);
@@ -3097,15 +3168,37 @@ TEST(PipelineRenameTracking, ReportsNewNamesWhenSingleStageRenames) {
ASSERT_EQ(nameMap["c.d"], "c.d");
}
{
+ // Tracking forwards.
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"a", "c.d"});
+ ASSERT(static_cast<bool>(renames));
+ auto nameMap = *renames;
+ ASSERT_EQ(nameMap.size(), 2UL);
+ ASSERT_EQ(nameMap["a"], "b");
+ ASSERT_EQ(nameMap["c.d"], "c.d");
+ }
+
+ {
// This is strange; the mock stage reports to essentially duplicate the "a" field into "b".
// Because of this, both "b" and "a" should map to "a".
- auto renames = pipeline->renamedPaths({"b", "a"});
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().crbegin(), pipeline->getSources().crend(), {"b", "a"});
ASSERT(static_cast<bool>(renames));
auto nameMap = *renames;
ASSERT_EQ(nameMap.size(), 2UL);
ASSERT_EQ(nameMap["b"], "a");
ASSERT_EQ(nameMap["a"], "a");
}
+ {
+ // Same strangeness as above, but in the forwards direction.
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().cbegin(), pipeline->getSources().cend(), {"b", "a"});
+ ASSERT(static_cast<bool>(renames));
+ auto nameMap = *renames;
+ ASSERT_EQ(nameMap.size(), 2UL);
+ ASSERT_EQ(nameMap["a"], "b");
+ ASSERT_EQ(nameMap["b"], "b");
+ }
}
TEST(PipelineRenameTracking, ReportsIdentityMapWhenGivenEmptyIteratorRange) {
@@ -3113,16 +3206,38 @@ TEST(PipelineRenameTracking, ReportsIdentityMapWhenGivenEmptyIteratorRange) {
auto pipeline = unittest::assertGet(
Pipeline::create({DocumentSourceMock::createForTest(), RenamesAToB::create()}, expCtx));
{
- auto renames = Pipeline::renamedPaths(
- pipeline->getSources().rbegin(), pipeline->getSources().rbegin(), {"b"});
+ // Tracking backwards.
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().crbegin(), pipeline->getSources().crbegin(), {"b"});
ASSERT(static_cast<bool>(renames));
auto nameMap = *renames;
ASSERT_EQ(nameMap.size(), 1UL);
ASSERT_EQ(nameMap["b"], "b");
}
{
- auto renames = Pipeline::renamedPaths(
- pipeline->getSources().rbegin(), pipeline->getSources().rbegin(), {"b", "c.d"});
+ // Tracking forwards.
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().cbegin(), pipeline->getSources().cbegin(), {"b"});
+ ASSERT(static_cast<bool>(renames));
+ auto nameMap = *renames;
+ ASSERT_EQ(nameMap.size(), 1UL);
+ ASSERT_EQ(nameMap["b"], "b");
+ }
+
+ {
+ // Tracking backwards.
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().crbegin(), pipeline->getSources().crbegin(), {"b", "c.d"});
+ ASSERT(static_cast<bool>(renames));
+ auto nameMap = *renames;
+ ASSERT_EQ(nameMap.size(), 2UL);
+ ASSERT_EQ(nameMap["b"], "b");
+ ASSERT_EQ(nameMap["c.d"], "c.d");
+ }
+ {
+ // Tracking forwards.
+ auto renames = semantic_analysis::renamedPaths(
+ pipeline->getSources().cbegin(), pipeline->getSources().cbegin(), {"b", "c.d"});
ASSERT(static_cast<bool>(renames));
auto nameMap = *renames;
ASSERT_EQ(nameMap.size(), 2UL);
@@ -3144,14 +3259,30 @@ public:
TEST(PipelineRenameTracking, ReportsNewNameAcrossMultipleRenames) {
boost::intrusive_ptr<ExpressionContext> expCtx(new ExpressionContextForTest());
- auto pipeline = unittest::assertGet(Pipeline::create(
- {DocumentSourceMock::createForTest(), RenamesAToB::create(), RenamesBToC::create()},
- expCtx));
- auto renames = pipeline->renamedPaths({"c"});
- ASSERT(static_cast<bool>(renames));
- auto nameMap = *renames;
- ASSERT_EQ(nameMap.size(), 1UL);
- ASSERT_EQ(nameMap["c"], "a");
+ {
+ // Tracking backwards.
+ auto pipeline = unittest::assertGet(Pipeline::create(
+ {DocumentSourceMock::createForTest(), RenamesAToB::create(), RenamesBToC::create()},
+ expCtx));
+ auto stages = pipeline->getSources();
+ auto renames = semantic_analysis::renamedPaths(stages.crbegin(), stages.crend(), {"c"});
+ ASSERT(static_cast<bool>(renames));
+ auto nameMap = *renames;
+ ASSERT_EQ(nameMap.size(), 1UL);
+ ASSERT_EQ(nameMap["c"], "a");
+ }
+ {
+ // Tracking forwards.
+ auto pipeline = unittest::assertGet(Pipeline::create(
+ {DocumentSourceMock::createForTest(), RenamesAToB::create(), RenamesBToC::create()},
+ expCtx));
+ auto stages = pipeline->getSources();
+ auto renames = semantic_analysis::renamedPaths(stages.cbegin(), stages.cend(), {"a"});
+ ASSERT(static_cast<bool>(renames));
+ auto nameMap = *renames;
+ ASSERT_EQ(nameMap.size(), 1UL);
+ ASSERT_EQ(nameMap["a"], "c");
+ }
}
class RenamesBToA : public DocumentSourceTestOptimizations {
@@ -3161,20 +3292,36 @@ public:
return new RenamesBToA();
}
GetModPathsReturn getModifiedPaths() const final {
- return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}, {{"b", "a"}}};
+ return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}, {{"a", "b"}}};
}
};
TEST(PipelineRenameTracking, CanHandleBackAndForthRename) {
boost::intrusive_ptr<ExpressionContext> expCtx(new ExpressionContextForTest());
- auto pipeline = unittest::assertGet(Pipeline::create(
- {DocumentSourceMock::createForTest(), RenamesAToB::create(), RenamesBToA::create()},
- expCtx));
- auto renames = pipeline->renamedPaths({"a"});
- ASSERT(static_cast<bool>(renames));
- auto nameMap = *renames;
- ASSERT_EQ(nameMap.size(), 1UL);
- ASSERT_EQ(nameMap["a"], "a");
+ {
+ // Tracking backwards.
+ auto pipeline = unittest::assertGet(Pipeline::create(
+ {DocumentSourceMock::createForTest(), RenamesAToB::create(), RenamesBToA::create()},
+ expCtx));
+ auto stages = pipeline->getSources();
+ auto renames = semantic_analysis::renamedPaths(stages.crbegin(), stages.crend(), {"a"});
+ ASSERT(static_cast<bool>(renames));
+ auto nameMap = *renames;
+ ASSERT_EQ(nameMap.size(), 1UL);
+ ASSERT_EQ(nameMap["a"], "a");
+ }
+ {
+ // Tracking forwards.
+ auto pipeline = unittest::assertGet(Pipeline::create(
+ {DocumentSourceMock::createForTest(), RenamesAToB::create(), RenamesBToA::create()},
+ expCtx));
+ auto stages = pipeline->getSources();
+ auto renames = semantic_analysis::renamedPaths(stages.cbegin(), stages.cend(), {"a"});
+ ASSERT(static_cast<bool>(renames));
+ auto nameMap = *renames;
+ ASSERT_EQ(nameMap.size(), 1UL);
+ ASSERT_EQ(nameMap["a"], "a");
+ }
}
TEST(InvolvedNamespacesTest, NoInvolvedNamespacesForMatchSortProject) {
diff --git a/src/mongo/db/pipeline/semantic_analysis.cpp b/src/mongo/db/pipeline/semantic_analysis.cpp
new file mode 100644
index 00000000000..256fbf9acbf
--- /dev/null
+++ b/src/mongo/db/pipeline/semantic_analysis.cpp
@@ -0,0 +1,246 @@
+/**
+ * Copyright (C) 2019-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/matcher/expression_algo.h"
+#include "mongo/db/pipeline/pipeline.h"
+#include "mongo/db/pipeline/semantic_analysis.h"
+
+namespace mongo::semantic_analysis {
+
+namespace {
+
+/**
+ * If 'pathOfInterest' or some path prefix of 'pathOfInterest' is renamed, returns the rename for
+ * 'pathOfInterest', otherwise returns boost::none. For example, if 'renamedPaths' is {"a", "b"},
+ * and 'pathOfInterest' is "a.c", returns "b.c". Note that 'renamedPaths' must map names as they
+ * exist at one fixed point in an aggregation pipeline to names as they exist at another fixed point
+ * in the same pipeline (i.e. from path names as they exist before some particular aggregation
+ * stage, to names as they appear after that stage).
+ */
+boost::optional<std::string> findRename(const StringMap<std::string>& renamedPaths,
+ std::string pathOfInterest) {
+ FieldPath fullPathOfInterest(pathOfInterest);
+ boost::optional<StringBuilder> toLookup;
+ boost::optional<StringBuilder> renamedPath;
+ for (std::size_t pathIndex = 0; pathIndex < fullPathOfInterest.getPathLength(); ++pathIndex) {
+ if (!renamedPath) {
+ if (!toLookup) {
+ toLookup.emplace();
+ } else {
+ (*toLookup) << ".";
+ }
+ (*toLookup) << fullPathOfInterest.getFieldName(pathIndex);
+ if (auto it = renamedPaths.find(toLookup->stringData()); it != renamedPaths.end()) {
+ renamedPath.emplace();
+ (*renamedPath) << it->second;
+ }
+ // Found a rename - but it might be a rename of the prefix of the path, so we have to
+ // add back on the suffix that was unchanged.
+ } else {
+ (*renamedPath) << "." << fullPathOfInterest.getFieldName(pathIndex);
+ }
+ }
+ return renamedPath.map([](auto&& path) { return path.str(); });
+}
+
+/**
+ * Computes and returns a map that contains a mapping from each path in 'pathsOfInterest' to its
+ * rename, as determined by 'renamedPaths.' If a path was not renamed, assumes it is unmodified and
+ * maps the path to itself.
+ */
+StringMap<std::string> computeNamesAssumingAnyPathsNotRenamedAreUnmodified(
+ const StringMap<std::string>& renamedPaths, const std::set<std::string>& pathsOfInterest) {
+ StringMap<std::string> renameOut;
+ for (auto&& ofInterest : pathsOfInterest) {
+ if (auto name = findRename(renamedPaths, ofInterest)) {
+ renameOut[ofInterest] = *name;
+ } else {
+ // This path was not renamed, assume it was unchanged and map it to itself.
+ renameOut[ofInterest] = ofInterest;
+ }
+ }
+ return renameOut;
+}
+
+StringMap<std::string> invertRenameMap(const StringMap<std::string>& originalMap) {
+ StringMap<std::string> reversedMap;
+ for (auto && [ newName, oldName ] : originalMap) {
+ reversedMap[oldName] = newName;
+ }
+ return reversedMap;
+}
+
+/**
+ * Computes and returns a rename mapping for 'pathsOfInterest' over multiple aggregation pipeline
+ * stages. The range of pipeline stages we compute renames over is represented by the iterators
+ * 'start' and 'end'. If both 'start' and 'end' are reverse iterators, then 'start' should come
+ * after 'end' in the pipeline, 'traversalDir' should be "kBackward," 'pathsOfInterest' should be
+ * valid path names after stage 'start,' and this template will compute a mapping from the given
+ * names of 'pathsOfInterest' to their names as they were directly after stage 'end.'If both 'start'
+ * and 'end' are forwards iterators, then 'start' should come before 'end' in the pipeline,
+ * 'traversalDir' should be "kForward," 'pathsOfInterest' should be valid path names before stage
+ * 'start,' and this template will compute a mapping from the given names of 'pathsOfInterest' to
+ * their names as they are directly before stage 'end.'
+ *
+ * This should only be used internally; callers who need to track path renames through an
+ * aggregation pipeline should use one of the publically exposed options availible in the header.
+ */
+template <class Iterator>
+boost::optional<StringMap<std::string>> multiStageRenamedPaths(
+ Iterator start,
+ Iterator end,
+ std::set<std::string> pathsOfInterest,
+ const Direction& traversalDir) {
+ // The keys to this map will always be the original names of 'pathsOfInterest'. The values will
+ // be updated as we loop through the pipeline's stages to always be the most up-to-date name we
+ // know of for that path.
+ StringMap<std::string> renameMap;
+ for (auto&& path : pathsOfInterest) {
+ renameMap[path] = path;
+ }
+ for (; start != end; ++start) {
+ auto renamed = renamedPaths(pathsOfInterest, **start, traversalDir);
+ if (!renamed) {
+ return boost::none;
+ }
+ //'pathsOfInterest' always holds the current names of the paths we're interested in, so it
+ // needs to be updated after each stage.
+ pathsOfInterest.clear();
+ for (auto it = renameMap.cbegin(); it != renameMap.cend(); ++it) {
+ renameMap[it->first] = (*renamed)[it->second];
+ pathsOfInterest.emplace(it->second);
+ }
+ }
+ return renameMap;
+}
+
+} // namespace
+
+std::set<std::string> extractModifiedDependencies(const std::set<std::string>& dependencies,
+ const std::set<std::string>& preservedPaths) {
+ std::set<std::string> modifiedDependencies;
+
+ // The modified dependencies is *almost* the set difference 'dependencies' - 'preservedPaths',
+ // except that if p in 'preservedPaths' is a "path prefix" of d in 'dependencies', then 'd'
+ // should not be included in the modified dependencies.
+ for (auto&& dependency : dependencies) {
+ bool preserved = false;
+ auto firstField = FieldPath::extractFirstFieldFromDottedPath(dependency).toString();
+ // Because a path is preserved if the object it points to is preserved, if a prefix to a
+ // path is preserved, then the path itself must be preserved. So we search for any prefixes
+ // of 'dependency' as well. 'preservedPaths' is an *ordered* set, so we only have to search
+ // the range ['firstField', 'dependency'] to find any prefixes of 'dependency'.
+ for (auto it = preservedPaths.lower_bound(firstField);
+ it != preservedPaths.upper_bound(dependency);
+ ++it) {
+ if (*it == dependency || expression::isPathPrefixOf(*it, dependency)) {
+ preserved = true;
+ break;
+ }
+ }
+ if (!preserved) {
+ modifiedDependencies.insert(dependency);
+ }
+ }
+ return modifiedDependencies;
+}
+
+boost::optional<StringMap<std::string>> renamedPaths(const std::set<std::string>& pathsOfInterest,
+ const DocumentSource& stage,
+ const Direction& traversalDir) {
+ auto modifiedPathsRet = stage.getModifiedPaths();
+ switch (modifiedPathsRet.type) {
+ case DocumentSource::GetModPathsReturn::Type::kNotSupported:
+ case DocumentSource::GetModPathsReturn::Type::kAllPaths:
+ return boost::none;
+ case DocumentSource::GetModPathsReturn::Type::kFiniteSet: {
+ for (auto&& modified : modifiedPathsRet.paths) {
+ for (auto&& ofInterest : pathsOfInterest) {
+ // Any overlap of the path means the path of interest is not preserved. For
+ // example, if the path of interest is "a.b", then a modified path of "a",
+ // "a.b", or "a.b.c" would all signal that "a.b" is not preserved.
+ if (ofInterest == modified ||
+ expression::isPathPrefixOf(ofInterest, modified) ||
+ expression::isPathPrefixOf(modified, ofInterest)) {
+ // This stage modifies at least one of the fields which the caller is
+ // interested in, bail out.
+ return boost::none;
+ }
+ }
+ }
+
+ // None of the paths of interest were modified, construct the result map, mapping
+ // the names after this stage to the names before this stage.
+ auto renameMap = (traversalDir == Direction::kForward)
+ ? invertRenameMap(modifiedPathsRet.renames)
+ : modifiedPathsRet.renames;
+ return computeNamesAssumingAnyPathsNotRenamedAreUnmodified(renameMap, pathsOfInterest);
+ }
+ case DocumentSource::GetModPathsReturn::Type::kAllExcept: {
+ auto preservedPaths = modifiedPathsRet.paths;
+ for (auto && [ newName, oldName ] : modifiedPathsRet.renames) {
+ // For the purposes of checking which paths are modified, consider renames to
+ // preserve the path. We'll circle back later to figure out the new name if
+ // appropriate. If we are going forward, we want to consider the name of the path
+ // before 'stage'; otherwise, we want to consider the name as it exists after
+ // 'stage'.
+ auto preservedPath = (traversalDir == Direction::kForward) ? oldName : newName;
+ preservedPaths.insert(preservedPath);
+ }
+ auto modifiedPaths = extractModifiedDependencies(pathsOfInterest, preservedPaths);
+ if (modifiedPaths.empty()) {
+ auto renameMap = (traversalDir == Direction::kForward)
+ ? invertRenameMap(modifiedPathsRet.renames)
+ : modifiedPathsRet.renames;
+ return computeNamesAssumingAnyPathsNotRenamedAreUnmodified(renameMap,
+ pathsOfInterest);
+ }
+ return boost::none;
+ }
+ }
+ MONGO_UNREACHABLE;
+}
+
+boost::optional<StringMap<std::string>> renamedPaths(
+ const Pipeline::SourceContainer::const_iterator start,
+ const Pipeline::SourceContainer::const_iterator end,
+ const std::set<std::string>& pathsOfInterest) {
+ return multiStageRenamedPaths(start, end, pathsOfInterest, Direction::kForward);
+}
+
+boost::optional<StringMap<std::string>> renamedPaths(
+ const Pipeline::SourceContainer::const_reverse_iterator start,
+ const Pipeline::SourceContainer::const_reverse_iterator end,
+ const std::set<std::string>& pathsOfInterest) {
+ return multiStageRenamedPaths(start, end, pathsOfInterest, Direction::kBackward);
+}
+
+} // namespace mongo::semantic_analysis
diff --git a/src/mongo/db/pipeline/semantic_analysis.h b/src/mongo/db/pipeline/semantic_analysis.h
new file mode 100644
index 00000000000..73739919e10
--- /dev/null
+++ b/src/mongo/db/pipeline/semantic_analysis.h
@@ -0,0 +1,102 @@
+/**
+ * Copyright (C) 2019-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <boost/optional.hpp>
+#include <set>
+#include <string>
+
+#include "mongo/db/pipeline/document_source.h"
+#include "mongo/db/pipeline/pipeline.h"
+
+namespace mongo::semantic_analysis {
+
+enum class Direction { kForward, kBackward };
+
+/**
+ * Takes in a set of paths the caller is interested in, a pipeline stage, and a direction. If the
+ * direction is "forward", the paths given must exist before the stage is executed in the pipeline.
+ * If the direction is "backward," the paths must exist after the stage is executed in the pipeline.
+ * Returns boost::none if any of the pathsOfInterest are modified by the current stage. If all of
+ * the pathsOfInterest are preserved (but possibly renamed), we return a mapping from
+ * [pathOfInterest] --> [newName], where "newName" is the name of "pathOfInterest" on the "other
+ * side" of the stage with respect to the given direction.
+ *
+ * For example, say pathsOfInterest contains "a", a path name that exists before nextStage is run in
+ * the pipeline, and direction is forward. Say nextStage preserves all the paths but renames "a" to
+ * "b"; we would return a mapping a-->b.
+ *
+ * Conversely, say pathsOfInterest contains "b", a path name that exists after nextStage is run in
+ * the pipeline, and direction is backward. Say nextStage preserves all the paths but renamed "a" to
+ * "b"; we would return a mapping b-->a.
+ */
+boost::optional<StringMap<std::string>> renamedPaths(const std::set<std::string>& pathsOfInterest,
+ const DocumentSource& stage,
+ const Direction& traversalDir);
+/**
+ * Tracks renames by walking a pipeline forwards. Takes two forward iterators that represent two
+ * stages in an aggregation pipeline, with 'start' coming before 'end,' as well as a set of path
+ * names the caller is interested in. Returns boost::none if any of these paths are modified by the
+ * pipeline from stages ['start' - 'end'); otherwise returns a mapping for each path in
+ * 'pathsOfInterest' from its name before stage 'start' to its name before stage 'end'. To track
+ * renames through an entire pipeline, 'start' should refer to the first stage in the pipeline while
+ * 'end' should be an iterator referring to the past-the-end stage.
+ */
+boost::optional<StringMap<std::string>> renamedPaths(
+ const Pipeline::SourceContainer::const_iterator start,
+ const Pipeline::SourceContainer::const_iterator end,
+ const std::set<std::string>& pathsOfInterest);
+
+/**
+ * Tracks renames by walking a pipeline backwards. Takes two reverse iterators that represent two
+ * stages in an aggregation pipeline, with 'start' coming after 'end,' as well as a set of path
+ * names the caller is interested in. Returns boost::none if any of these paths were modified by the
+ * pipeline from stages ('end' - 'start']; otherwise returns a mapping for each path in
+ * 'pathsOfInterest' from its name after stage 'start' to its name as it was after stage 'end'. To
+ * track renames through an entire pipeline, 'start' should refer to the last stage in the pipeline
+ * while 'end' should refer to the (hypothetical) stage preceeding the first stage in the pipeline
+ * (the 'reverse end').
+ */
+boost::optional<StringMap<std::string>> renamedPaths(
+ const Pipeline::SourceContainer::const_reverse_iterator start,
+ const Pipeline::SourceContainer::const_reverse_iterator end,
+ const std::set<std::string>& pathsOfInterest);
+
+/**
+ * Given a set of paths 'dependencies', determines which of those paths will be modified if all
+ * paths except those in 'preservedPaths' are modified.
+ *
+ * For example, extractModifiedDependencies({'a', 'b', 'c.d', 'e'}, {'a', 'b.c', c'}) returns
+ * {'b', 'e'}, since 'b' and 'e' are not preserved (only 'b.c' is preserved).
+ */
+std::set<std::string> extractModifiedDependencies(const std::set<std::string>& dependencies,
+ const std::set<std::string>& preservedPaths);
+
+} // namespace mongo::semantic_analysis
diff --git a/src/mongo/db/pipeline/semantic_analysis_test.cpp b/src/mongo/db/pipeline/semantic_analysis_test.cpp
new file mode 100644
index 00000000000..8b43af7e9c3
--- /dev/null
+++ b/src/mongo/db/pipeline/semantic_analysis_test.cpp
@@ -0,0 +1,434 @@
+/**
+ * Copyright (C) 2018-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/bson/bsonmisc.h"
+#include "mongo/bson/bsonobj.h"
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/pipeline/document_source.h"
+#include "mongo/db/pipeline/document_source_test_optimizations.h"
+#include "mongo/db/pipeline/semantic_analysis.h"
+#include "mongo/db/service_context_test_fixture.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+
+namespace {
+
+using namespace semantic_analysis;
+
+class RenamesAToB : public DocumentSourceTestOptimizations {
+public:
+ RenamesAToB() : DocumentSourceTestOptimizations() {}
+ GetModPathsReturn getModifiedPaths() const final {
+ // Pretend this stage simply renames the "a" field to be "b", leaving the value of "a" the
+ // same. This would be the equivalent of an {$addFields: {b: "$a"}}.
+ return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}, {{"b", "a"}}};
+ }
+};
+
+TEST(SemanticAnalysisRenamedPaths, DoesReturnSimpleRenameFromFiniteSetRename) {
+ RenamesAToB renamesAToB;
+ {
+ auto renames = renamedPaths({"a"}, renamesAToB, Direction::kForward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["a"], "b");
+ }
+ {
+ auto renames = renamedPaths({"b"}, renamesAToB, Direction::kBackward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["b"], "a");
+ }
+}
+
+TEST(SemanticAnalysisRenamedPaths, ReturnsSimpleMapForUnaffectedFieldsFromFiniteSetRename) {
+ RenamesAToB renamesAToB;
+ {
+ auto renames = renamedPaths({"c"}, renamesAToB, Direction::kForward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["c"], "c");
+ }
+ {
+ auto renames = renamedPaths({"c"}, renamesAToB, Direction::kBackward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["c"], "c");
+ }
+ {
+ auto renames = renamedPaths({"a"}, renamesAToB, Direction::kForward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["a"], "b");
+ }
+ {
+ auto renames = renamedPaths({"b"}, renamesAToB, Direction::kBackward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["b"], "a");
+ }
+ {
+ auto renames = renamedPaths({"e", "f", "g"}, renamesAToB, Direction::kForward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 3UL);
+ ASSERT_EQ(map["e"], "e");
+ ASSERT_EQ(map["f"], "f");
+ ASSERT_EQ(map["g"], "g");
+ }
+ {
+ auto renames = renamedPaths({"e", "f", "g"}, renamesAToB, Direction::kBackward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 3UL);
+ ASSERT_EQ(map["e"], "e");
+ ASSERT_EQ(map["f"], "f");
+ ASSERT_EQ(map["g"], "g");
+ }
+}
+
+class RenameCToDPreserveEFG : public DocumentSourceTestOptimizations {
+public:
+ RenameCToDPreserveEFG() : DocumentSourceTestOptimizations() {}
+
+ GetModPathsReturn getModifiedPaths() const final {
+ return {GetModPathsReturn::Type::kAllExcept,
+ std::set<std::string>{"e", "f", "g"},
+ {{"d", "c"}}};
+ }
+};
+
+TEST(SemanticAnalysisRenamedPaths, DoesReturnSimpleRenameFromAllExceptRename) {
+ RenameCToDPreserveEFG renameCToDPreserveEFG;
+ {
+ auto renames = renamedPaths({"c"}, renameCToDPreserveEFG, Direction::kForward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["c"], "d");
+ }
+ {
+ auto renames = renamedPaths({"d"}, renameCToDPreserveEFG, Direction::kBackward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["d"], "c");
+ }
+}
+
+TEST(SemanticAnalysisRenamedPaths, ReturnsSimpleMapForUnaffectedFieldsFromAllExceptRename) {
+ RenameCToDPreserveEFG renameCToDPreserveEFG;
+ {
+ auto renames = renamedPaths({"e"}, renameCToDPreserveEFG, Direction::kForward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["e"], "e");
+ }
+ {
+ auto renames = renamedPaths({"e"}, renameCToDPreserveEFG, Direction::kBackward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["e"], "e");
+ }
+ {
+ auto renames = renamedPaths({"f", "g"}, renameCToDPreserveEFG, Direction::kForward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 2UL);
+ ASSERT_EQ(map["f"], "f");
+ ASSERT_EQ(map["g"], "g");
+ }
+ {
+ auto renames = renamedPaths({"f", "g"}, renameCToDPreserveEFG, Direction::kBackward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 2UL);
+ ASSERT_EQ(map["f"], "f");
+ ASSERT_EQ(map["g"], "g");
+ }
+}
+
+class RenameCDotDToEPreserveFDotG : public DocumentSourceTestOptimizations {
+public:
+ RenameCDotDToEPreserveFDotG() : DocumentSourceTestOptimizations() {}
+
+ GetModPathsReturn getModifiedPaths() const final {
+ return {GetModPathsReturn::Type::kAllExcept, std::set<std::string>{"f.g"}, {{"e", "c.d"}}};
+ }
+};
+
+TEST(SemanticAnalysisRenamedPaths, DoesReturnRenameToDottedFieldFromAllExceptRename) {
+ RenameCDotDToEPreserveFDotG renameCDotDToEPreserveFDotG;
+ {
+ auto renames = renamedPaths({"c.d"}, renameCDotDToEPreserveFDotG, Direction::kForward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["c.d"], "e");
+ }
+ {
+ auto renames = renamedPaths({"e"}, renameCDotDToEPreserveFDotG, Direction::kBackward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["e"], "c.d");
+ }
+ {
+ auto renames =
+ renamedPaths({"c.d.x", "c.d.y"}, renameCDotDToEPreserveFDotG, Direction::kForward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 2UL);
+ ASSERT_EQ(map["c.d.x"], "e.x");
+ ASSERT_EQ(map["c.d.y"], "e.y");
+ }
+ {
+ auto renames =
+ renamedPaths({"e.x", "e.y"}, renameCDotDToEPreserveFDotG, Direction::kBackward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 2UL);
+ ASSERT_EQ(map["e.x"], "c.d.x");
+ ASSERT_EQ(map["e.y"], "c.d.y");
+ }
+}
+
+TEST(SemanticAnalysisRenamedPaths,
+ DoesNotTreatPrefixAsUnmodifiedWhenSuffixIsModifiedFromAllExcept) {
+ RenameCDotDToEPreserveFDotG renameCDotDToEPreserveFDotG;
+ {
+ auto renames = renamedPaths({"f"}, renameCDotDToEPreserveFDotG, Direction::kForward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+ {
+ auto renames = renamedPaths({"f"}, renameCDotDToEPreserveFDotG, Direction::kBackward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+ {
+ // This is the exception, the only path that is not modified.
+ auto renames = renamedPaths({"f.g"}, renameCDotDToEPreserveFDotG, Direction::kForward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["f.g"], "f.g");
+ }
+ {
+ // This is the exception, the only path that is not modified.
+ auto renames = renamedPaths({"f.g"}, renameCDotDToEPreserveFDotG, Direction::kBackward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["f.g"], "f.g");
+ }
+ {
+ // We know "f.g" is preserved, so it follows that a subpath of that path is also preserved.
+ auto renames = renamedPaths(
+ {"f.g.x", "f.g.xyz.foobarbaz"}, renameCDotDToEPreserveFDotG, Direction::kForward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 2UL);
+ ASSERT_EQ(map["f.g.x"], "f.g.x");
+ ASSERT_EQ(map["f.g.xyz.foobarbaz"], "f.g.xyz.foobarbaz");
+ }
+ {
+ auto renames = renamedPaths(
+ {"f.g.x", "f.g.xyz.foobarbaz"}, renameCDotDToEPreserveFDotG, Direction::kBackward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 2UL);
+ ASSERT_EQ(map["f.g.x"], "f.g.x");
+ ASSERT_EQ(map["f.g.xyz.foobarbaz"], "f.g.xyz.foobarbaz");
+ }
+ {
+ // This shares a prefix with the unmodified path, but should not be reported as unmodified.
+ auto renames = renamedPaths({"f.x"}, renameCDotDToEPreserveFDotG, Direction::kForward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+}
+
+class RenameAToXDotYModifyCDotD : public DocumentSourceTestOptimizations {
+public:
+ RenameAToXDotYModifyCDotD() : DocumentSourceTestOptimizations() {}
+
+ GetModPathsReturn getModifiedPaths() const final {
+ return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{"c.d"}, {{"x.y", "a"}}};
+ }
+};
+
+TEST(SemanticAnalysisRenamedPaths, DoesReturnRenameToDottedFieldFromFiniteSetRename) {
+ RenameAToXDotYModifyCDotD renameAToXDotYModifyCDotD;
+ {
+ auto renames = renamedPaths({"a"}, renameAToXDotYModifyCDotD, Direction::kForward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["a"], "x.y");
+ }
+ {
+ auto renames = renamedPaths({"x.y"}, renameAToXDotYModifyCDotD, Direction::kBackward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 1UL);
+ ASSERT_EQ(map["x.y"], "a");
+ }
+ {
+ auto renames =
+ renamedPaths({"a.z", "a.a.b.c"}, renameAToXDotYModifyCDotD, Direction::kForward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 2UL);
+ ASSERT_EQ(map["a.z"], "x.y.z");
+ ASSERT_EQ(map["a.a.b.c"], "x.y.a.b.c");
+ }
+ {
+ auto renames =
+ renamedPaths({"x.y.z", "x.y.a.b.c"}, renameAToXDotYModifyCDotD, Direction::kBackward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 2UL);
+ ASSERT_EQ(map["x.y.z"], "a.z");
+ ASSERT_EQ(map["x.y.a.b.c"], "a.a.b.c");
+ }
+}
+
+TEST(SemanticAnalysisRenamedPaths, DoesNotTreatPrefixAsUnmodifiedWhenSuffixIsPartOfModifiedSet) {
+ RenameAToXDotYModifyCDotD renameAToXDotYModifyCDotD;
+ {
+ auto renames = renamedPaths({"c"}, renameAToXDotYModifyCDotD, Direction::kForward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+ {
+ auto renames = renamedPaths({"c.d"}, renameAToXDotYModifyCDotD, Direction::kForward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+ {
+ auto renames = renamedPaths({"c.d.e"}, renameAToXDotYModifyCDotD, Direction::kForward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+ {
+ auto renames = renamedPaths({"c"}, renameAToXDotYModifyCDotD, Direction::kBackward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+ {
+ auto renames = renamedPaths({"c.d"}, renameAToXDotYModifyCDotD, Direction::kBackward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+ {
+ auto renames = renamedPaths({"c.d.e"}, renameAToXDotYModifyCDotD, Direction::kBackward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+ {
+ auto renames =
+ renamedPaths({"c.not_d", "c.decoy"}, renameAToXDotYModifyCDotD, Direction::kForward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 2UL);
+ ASSERT_EQ(map["c.not_d"], "c.not_d");
+ ASSERT_EQ(map["c.decoy"], "c.decoy");
+ }
+ {
+ auto renames =
+ renamedPaths({"c.not_d", "c.decoy"}, renameAToXDotYModifyCDotD, Direction::kBackward);
+ ASSERT(static_cast<bool>(renames));
+ auto map = *renames;
+ ASSERT_EQ(map.size(), 2UL);
+ ASSERT_EQ(map["c.not_d"], "c.not_d");
+ ASSERT_EQ(map["c.decoy"], "c.decoy");
+ }
+}
+
+class ModifiesAllPaths : public DocumentSourceTestOptimizations {
+public:
+ ModifiesAllPaths() : DocumentSourceTestOptimizations() {}
+ GetModPathsReturn getModifiedPaths() const final {
+ return {GetModPathsReturn::Type::kAllPaths, std::set<std::string>{}, {}};
+ }
+};
+
+TEST(SemanticAnalysisRenamedPaths, ReturnsNoneWhenAllPathsAreModified) {
+ ModifiesAllPaths modifiesAllPaths;
+ {
+ auto renames = renamedPaths({"a"}, modifiesAllPaths, Direction::kForward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+ {
+ auto renames = renamedPaths({"a", "b", "c.d"}, modifiesAllPaths, Direction::kForward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+ {
+ auto renames = renamedPaths({"a"}, modifiesAllPaths, Direction::kBackward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+ {
+ auto renames = renamedPaths({"a", "b", "c.d"}, modifiesAllPaths, Direction::kBackward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+}
+
+class ModificationsUnknown : public DocumentSourceTestOptimizations {
+public:
+ ModificationsUnknown() : DocumentSourceTestOptimizations() {}
+ GetModPathsReturn getModifiedPaths() const final {
+ return {GetModPathsReturn::Type::kNotSupported, std::set<std::string>{}, {}};
+ }
+};
+
+TEST(SemanticAnalysisRenamedPaths, ReturnsNoneWhenModificationsAreNotKnown) {
+ ModificationsUnknown modificationsUnknown;
+ {
+ auto renames = renamedPaths({"a"}, modificationsUnknown, Direction::kForward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+ {
+ auto renames = renamedPaths({"a", "b", "c.d"}, modificationsUnknown, Direction::kForward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+ {
+ auto renames = renamedPaths({"a"}, modificationsUnknown, Direction::kBackward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+ {
+ auto renames = renamedPaths({"a", "b", "c.d"}, modificationsUnknown, Direction::kBackward);
+ ASSERT_FALSE(static_cast<bool>(renames));
+ }
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/s/query/cluster_aggregation_planner.cpp b/src/mongo/s/query/cluster_aggregation_planner.cpp
index 165140819ed..c11be89a9be 100644
--- a/src/mongo/s/query/cluster_aggregation_planner.cpp
+++ b/src/mongo/s/query/cluster_aggregation_planner.cpp
@@ -41,6 +41,7 @@
#include "mongo/db/pipeline/document_source_skip.h"
#include "mongo/db/pipeline/document_source_sort.h"
#include "mongo/db/pipeline/document_source_unwind.h"
+#include "mongo/db/pipeline/semantic_analysis.h"
#include "mongo/executor/task_executor_pool.h"
#include "mongo/s/catalog_cache.h"
#include "mongo/s/cluster_commands_helpers.h"
@@ -339,7 +340,7 @@ StringMap<std::string> computeShardKeyRenameMap(const Pipeline* mergePipeline,
// global groups. Thus we want to exclude this stage from our rename tracking.
traversalEnd = std::prev(traversalEnd);
}
- auto renameMap = Pipeline::renamedPaths(traversalStart, traversalEnd, pathsOfShardKey);
+ auto renameMap = semantic_analysis::renamedPaths(traversalStart, traversalEnd, pathsOfShardKey);
invariant(renameMap,
str::stream()
<< "Analyzed pipeline was thought to preserve the shard key fields, but did not: "
@@ -359,7 +360,8 @@ bool anyStageModifiesShardKeyOrNeedsMerge(std::set<std::string> shardKeyPaths,
const auto& stages = mergePipeline->getSources();
for (auto it = stages.crbegin(); it != stages.crend(); ++it) {
const auto& stage = *it;
- auto renames = stage->renamedPaths(std::move(shardKeyPaths));
+ auto renames = semantic_analysis::renamedPaths(
+ std::move(shardKeyPaths), *stage, semantic_analysis::Direction::kBackward);
if (!renames) {
return true;
}
@@ -415,7 +417,7 @@ boost::optional<ShardedExchangePolicy> walkPipelineBackwardsTrackingShardKey(
// chunk has an associated range [from, to); i.e. inclusive lower bound and exclusive upper
// bound. The chunk ranges must cover all domain without any holes. For the exchange we coalesce
// ranges into a single vector of points. E.g. chunks [min,5], [5,10], [10,max] will produce
- // [min,5,10,max] vector. Number of points in the vector is always one grater than number of
+ // [min,5,10,max] vector. Number of points in the vector is always one greater than number of
// chunks.
// We also compute consumer indices for every chunk. From the example above (3 chunks) we may
// get the vector [0,1,2]; i.e. the first chunk goes to the consumer 0 and so on. Note that
@@ -553,9 +555,6 @@ boost::optional<ShardedExchangePolicy> checkIfEligibleForExchange(OperationConte
return boost::none;
}
- const auto grid = Grid::get(opCtx);
- invariant(grid);
-
if (mergePipeline->getSources().empty()) {
return boost::none;
}