SERVER-51886 $lookup + $merge pipeline may fail to resolve views correctly when collection names collide

(cherry picked from commit 045829020fb4a3972b316b1ad60e9ed2f285b092) (cherry picked from commit dcd06945691b7cdd520b4c28428d941ee685104d)
author: Mihai Andrei <mihai.andrei@10gen.com> 2020-12-01 12:10:53 -0500
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2021-01-04 19:11:12 +0000
commit: e490b04a5e2511e042660c1e1d60b0636f5a14bd (patch)
tree: de2eb0435c2ee82294140d776592dd62644a55dc
parent: 37e81074de8577873b515f7e7ba9ef1fbc40a93a (diff)
download: mongo-e490b04a5e2511e042660c1e1d60b0636f5a14bd.tar.gz
3 files changed, 105 insertions, 20 deletions
diff --git a/jstests/aggregation/view_resolution_namespace_collision.js b/jstests/aggregation/view_resolution_namespace_collision.js
new file mode 100644
index 00000000000..026874a1179
--- /dev/null
+++ b/jstests/aggregation/view_resolution_namespace_collision.js
@@ -0,0 +1,54 @@
+/**
+ * Tests that view resolution works correctly when a cross-database aggregation targets a
+ * collection and a view with the same name on different databases.
+ *
+ * @tags: [assumes_no_implicit_collection_creation_after_drop]
+ */
+(function() {
+"use strict";
+
+const dbName = "test";
+const testDB = db.getSiblingDB(dbName);
+const siblingDBName = "otherDB";
+const siblingDB = testDB.getSiblingDB(siblingDBName);
+
+// Since cross-db $merge only works against existing databases in a sharded environment, we create a
+// dummy collection on 'siblingDB' to allow this test to run in a sharded environment.
+assert.commandWorked(siblingDB.foo.insert({}));
+
+const sourceName = jsTestName();
+const otherCollName = jsTestName() + "_other_coll";
+const collidingName = "collision_collection";
+const sourceColl = testDB[sourceName];
+const otherColl = testDB[otherCollName];
+
+sourceColl.drop();
+otherColl.drop();
+testDB[collidingName].drop();
+
+assert.commandWorked(sourceColl.insert({_id: 0}));
+assert.commandWorked(otherColl.insert({_id: 0, notsecret: 1, secret: "really secret"}));
+
+// Create a view on 'testDB' that will have the same name as the collection that $merge will create.
+assert.commandWorked(testDB.runCommand(
+    {create: collidingName, viewOn: otherCollName, pipeline: [{$project: {secret: 0}}]}));
+
+// Verify that the view gets resolved correctly when performing a cross database aggregation where
+// the $lookup references a view on the source database and the $merge references a collection on
+// the target database with the same name as the view.
+const lookupStage = {
+    $lookup: {from: collidingName, localField: "_id", foreignField: "_id", as: "matches"}
+};
+const withoutWrite = sourceColl.aggregate([lookupStage]).toArray();
+const mergeStage = {
+    $merge: {into: {db: siblingDBName, coll: collidingName}}
+};
+
+// The aggregate should always use the view on 'testDB', not an empty collection on 'siblingDB'.
+sourceColl.aggregate([lookupStage, mergeStage]).toArray();
+const withWrite = siblingDB[collidingName].find().toArray();
+assert.eq(withoutWrite, withWrite);
+siblingDB[collidingName].drop();
+
+siblingDB.dropDatabase();
+})();
+\ No newline at end of file
diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp
index d24bac8cb9a..915c827e11e 100644
--- a/src/mongo/db/commands/run_aggregate.cpp
+++ b/src/mongo/db/commands/run_aggregate.cpp
@@ -291,29 +291,17 @@ StatusWith<StringMap<ExpressionContext::ResolvedNamespace>> resolveInvolvedNames
             continue;
         }
 
-        if (involvedNs.db() != request.getNamespaceString().db()) {
-            // If the involved namespace is not in the same database as the aggregation, it must be
-            // from a $merge to a collection in a different database. Since we cannot write to
-            // views, simply assume that the namespace is a collection.
-            resolvedNamespaces[involvedNs.coll()] = {involvedNs, std::vector<BSONObj>{}};
-        } else if (!db || db->getCollection(opCtx, involvedNs)) {
-            // If the aggregation database exists and 'involvedNs' refers to a collection namespace,
-            // then we resolve it as an empty pipeline in order to read directly from the underlying
-            // collection. If the database doesn't exist, then we still resolve it as an empty
-            // pipeline because 'involvedNs' doesn't refer to a view namespace in our consistent
-            // snapshot of the view catalog.
-            resolvedNamespaces[involvedNs.coll()] = {involvedNs, std::vector<BSONObj>{}};
-        } else if (viewCatalog->lookup(opCtx, involvedNs.ns())) {
-            // If 'involvedNs' refers to a view namespace, then we resolve its definition.
-            auto resolvedView = viewCatalog->resolveView(opCtx, involvedNs);
+        // If 'ns' refers to a view namespace, then we resolve its definition.
+        auto resolveViewDefinition = [&](const NamespaceString& ns) -> Status {
+            auto resolvedView = viewCatalog->resolveView(opCtx, ns);
             if (!resolvedView.isOK()) {
                 return {ErrorCodes::FailedToParse,
                         str::stream() << "Failed to resolve view '" << involvedNs.ns()
                                       << "': " << resolvedView.getStatus().toString()};
             }
 
-            resolvedNamespaces[involvedNs.coll()] = {resolvedView.getValue().getNamespace(),
-                                                     resolvedView.getValue().getPipeline()};
+            resolvedNamespaces[ns.coll()] = {resolvedView.getValue().getNamespace(),
+                                             resolvedView.getValue().getPipeline()};
 
             // We parse the pipeline corresponding to the resolved view in case we must resolve
             // other view namespaces that are also involved.
@@ -325,6 +313,39 @@ StatusWith<StringMap<ExpressionContext::ResolvedNamespace>> resolveInvolvedNames
             involvedNamespacesQueue.insert(involvedNamespacesQueue.end(),
                                            resolvedViewInvolvedNamespaces.begin(),
                                            resolvedViewInvolvedNamespaces.end());
+            return Status::OK();
+        };
+
+        // If the involved namespace is not in the same database as the aggregation, it must be
+        // from a $merge to a collection in a different database.
+        if (involvedNs.db() != request.getNamespaceString().db()) {
+            // SERVER-51886: It is not correct to assume that we are reading from a collection
+            // because the collection targeted by $out/$merge on a given database can have the same
+            // name as a view on the source database. As such, we determine whether the collection
+            // name references a view on the aggregation request's database. Note that the inverse
+            // scenario (mistaking a view for a collection) is not an issue because $merge/$out
+            // cannot target a view.
+            auto nssToCheck = NamespaceString(request.getNamespaceString().db(), involvedNs.coll());
+            if (viewCatalog && viewCatalog->lookup(opCtx, nssToCheck.ns())) {
+                auto status = resolveViewDefinition(nssToCheck);
+                if (!status.isOK()) {
+                    return status;
+                }
+            } else {
+                resolvedNamespaces[involvedNs.coll()] = {involvedNs, std::vector<BSONObj>{}};
+            }
+        } else if (!db || CollectionCatalog::get(opCtx).lookupCollectionByNamespace(involvedNs)) {
+            // If the aggregation database exists and 'involvedNs' refers to a collection namespace,
+            // then we resolve it as an empty pipeline in order to read directly from the underlying
+            // collection. If the database doesn't exist, then we still resolve it as an empty
+            // pipeline because 'involvedNs' doesn't refer to a view namespace in our consistent
+            // snapshot of the view catalog.
+            resolvedNamespaces[involvedNs.coll()] = {involvedNs, std::vector<BSONObj>{}};
+        } else if (viewCatalog->lookup(opCtx, involvedNs.ns())) {
+            auto status = resolveViewDefinition(involvedNs);
+            if (!status.isOK()) {
+                return status;
+            }
         } else {
             // 'involvedNs' is neither a view nor a collection, so resolve it as an empty pipeline
             // to treat it as reading from a non-existent collection.
diff --git a/src/mongo/s/query/cluster_aggregate.cpp b/src/mongo/s/query/cluster_aggregate.cpp
index e06d49d3850..bc4b9191c90 100644
--- a/src/mongo/s/query/cluster_aggregate.cpp
+++ b/src/mongo/s/query/cluster_aggregate.cpp
@@ -566,10 +566,19 @@ ShardId pickMergingShard(OperationContext* opCtx,
 // definition. It's okay that this is incorrect, we will repopulate the real namespace map on the
 // mongod. Note that this function must be called before forwarding an aggregation command on an
 // unsharded collection, in order to verify that the involved namespaces are allowed to be sharded.
-auto resolveInvolvedNamespaces(OperationContext* opCtx, const LiteParsedPipeline& litePipe) {
+auto resolveInvolvedNamespaces(OperationContext* opCtx,
+                               const LiteParsedPipeline& litePipe,
+                               const NamespaceString& requestNss) {
     StringMap<ExpressionContext::ResolvedNamespace> resolvedNamespaces;
     for (auto&& nss : litePipe.getInvolvedNamespaces()) {
-        resolvedNamespaces.try_emplace(nss.coll(), nss, std::vector<BSONObj>{});
+        // SERVER-51886: Only add namespaces that are in the same database as the original request.
+        // This is done to avoid namespace collisions for cross-database $merge operations that
+        // read from and write to collections that have the same name. Note that we don't have to
+        // verify whether the target collection is sharded as $merge can output to sharded
+        // collections. The output namespace will never need to be resolved as we cannot write to
+        // views.
+        if (requestNss.db() == nss.db())
+            resolvedNamespaces.try_emplace(nss.coll(), nss, std::vector<BSONObj>{});
     }
     return resolvedNamespaces;
 }
@@ -801,7 +810,8 @@ Status ClusterAggregate::runAggregate(OperationContext* opCtx,
     // If we don't have a routing table, then this is a $changeStream which must run on all shards.
     invariant(routingInfo || (mustRunOnAll && litePipe.hasChangeStream()));
 
-    auto resolvedNamespaces = resolveInvolvedNamespaces(opCtx, litePipe);
+    auto resolvedNamespaces =
+        resolveInvolvedNamespaces(opCtx, litePipe, request.getNamespaceString());
 
     // A pipeline is allowed to passthrough to the primary shard iff the following conditions are
     // met:
author	Mihai Andrei <mihai.andrei@10gen.com>	2020-12-01 12:10:53 -0500
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2021-01-04 19:11:12 +0000
commit	e490b04a5e2511e042660c1e1d60b0636f5a14bd (patch)
tree	de2eb0435c2ee82294140d776592dd62644a55dc
parent	37e81074de8577873b515f7e7ba9ef1fbc40a93a (diff)
download	mongo-e490b04a5e2511e042660c1e1d60b0636f5a14bd.tar.gz