summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMihai Andrei <mihai.andrei@10gen.com>2020-12-01 12:10:53 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-01-04 19:11:12 +0000
commite490b04a5e2511e042660c1e1d60b0636f5a14bd (patch)
treede2eb0435c2ee82294140d776592dd62644a55dc
parent37e81074de8577873b515f7e7ba9ef1fbc40a93a (diff)
downloadmongo-e490b04a5e2511e042660c1e1d60b0636f5a14bd.tar.gz
SERVER-51886 $lookup + $merge pipeline may fail to resolve views correctly when collection names collide
(cherry picked from commit 045829020fb4a3972b316b1ad60e9ed2f285b092) (cherry picked from commit dcd06945691b7cdd520b4c28428d941ee685104d)
-rw-r--r--jstests/aggregation/view_resolution_namespace_collision.js54
-rw-r--r--src/mongo/db/commands/run_aggregate.cpp55
-rw-r--r--src/mongo/s/query/cluster_aggregate.cpp16
3 files changed, 105 insertions, 20 deletions
diff --git a/jstests/aggregation/view_resolution_namespace_collision.js b/jstests/aggregation/view_resolution_namespace_collision.js
new file mode 100644
index 00000000000..026874a1179
--- /dev/null
+++ b/jstests/aggregation/view_resolution_namespace_collision.js
@@ -0,0 +1,54 @@
+/**
+ * Tests that view resolution works correctly when a cross-database aggregation targets a
+ * collection and a view with the same name on different databases.
+ *
+ * @tags: [assumes_no_implicit_collection_creation_after_drop]
+ */
+(function() {
+"use strict";
+
+const dbName = "test";
+const testDB = db.getSiblingDB(dbName);
+const siblingDBName = "otherDB";
+const siblingDB = testDB.getSiblingDB(siblingDBName);
+
+// Since cross-db $merge only works against existing databases in a sharded environment, we create a
+// dummy collection on 'siblingDB' to allow this test to run in a sharded environment.
+assert.commandWorked(siblingDB.foo.insert({}));
+
+const sourceName = jsTestName();
+const otherCollName = jsTestName() + "_other_coll";
+const collidingName = "collision_collection";
+const sourceColl = testDB[sourceName];
+const otherColl = testDB[otherCollName];
+
+sourceColl.drop();
+otherColl.drop();
+testDB[collidingName].drop();
+
+assert.commandWorked(sourceColl.insert({_id: 0}));
+assert.commandWorked(otherColl.insert({_id: 0, notsecret: 1, secret: "really secret"}));
+
+// Create a view on 'testDB' that will have the same name as the collection that $merge will create.
+assert.commandWorked(testDB.runCommand(
+ {create: collidingName, viewOn: otherCollName, pipeline: [{$project: {secret: 0}}]}));
+
+// Verify that the view gets resolved correctly when performing a cross database aggregation where
+// the $lookup references a view on the source database and the $merge references a collection on
+// the target database with the same name as the view.
+const lookupStage = {
+ $lookup: {from: collidingName, localField: "_id", foreignField: "_id", as: "matches"}
+};
+const withoutWrite = sourceColl.aggregate([lookupStage]).toArray();
+const mergeStage = {
+ $merge: {into: {db: siblingDBName, coll: collidingName}}
+};
+
+// The aggregate should always use the view on 'testDB', not an empty collection on 'siblingDB'.
+sourceColl.aggregate([lookupStage, mergeStage]).toArray();
+const withWrite = siblingDB[collidingName].find().toArray();
+assert.eq(withoutWrite, withWrite);
+siblingDB[collidingName].drop();
+
+siblingDB.dropDatabase();
+})(); \ No newline at end of file
diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp
index d24bac8cb9a..915c827e11e 100644
--- a/src/mongo/db/commands/run_aggregate.cpp
+++ b/src/mongo/db/commands/run_aggregate.cpp
@@ -291,29 +291,17 @@ StatusWith<StringMap<ExpressionContext::ResolvedNamespace>> resolveInvolvedNames
continue;
}
- if (involvedNs.db() != request.getNamespaceString().db()) {
- // If the involved namespace is not in the same database as the aggregation, it must be
- // from a $merge to a collection in a different database. Since we cannot write to
- // views, simply assume that the namespace is a collection.
- resolvedNamespaces[involvedNs.coll()] = {involvedNs, std::vector<BSONObj>{}};
- } else if (!db || db->getCollection(opCtx, involvedNs)) {
- // If the aggregation database exists and 'involvedNs' refers to a collection namespace,
- // then we resolve it as an empty pipeline in order to read directly from the underlying
- // collection. If the database doesn't exist, then we still resolve it as an empty
- // pipeline because 'involvedNs' doesn't refer to a view namespace in our consistent
- // snapshot of the view catalog.
- resolvedNamespaces[involvedNs.coll()] = {involvedNs, std::vector<BSONObj>{}};
- } else if (viewCatalog->lookup(opCtx, involvedNs.ns())) {
- // If 'involvedNs' refers to a view namespace, then we resolve its definition.
- auto resolvedView = viewCatalog->resolveView(opCtx, involvedNs);
+ // If 'ns' refers to a view namespace, then we resolve its definition.
+ auto resolveViewDefinition = [&](const NamespaceString& ns) -> Status {
+ auto resolvedView = viewCatalog->resolveView(opCtx, ns);
if (!resolvedView.isOK()) {
return {ErrorCodes::FailedToParse,
str::stream() << "Failed to resolve view '" << involvedNs.ns()
<< "': " << resolvedView.getStatus().toString()};
}
- resolvedNamespaces[involvedNs.coll()] = {resolvedView.getValue().getNamespace(),
- resolvedView.getValue().getPipeline()};
+ resolvedNamespaces[ns.coll()] = {resolvedView.getValue().getNamespace(),
+ resolvedView.getValue().getPipeline()};
// We parse the pipeline corresponding to the resolved view in case we must resolve
// other view namespaces that are also involved.
@@ -325,6 +313,39 @@ StatusWith<StringMap<ExpressionContext::ResolvedNamespace>> resolveInvolvedNames
involvedNamespacesQueue.insert(involvedNamespacesQueue.end(),
resolvedViewInvolvedNamespaces.begin(),
resolvedViewInvolvedNamespaces.end());
+ return Status::OK();
+ };
+
+ // If the involved namespace is not in the same database as the aggregation, it must be
+ // from a $merge to a collection in a different database.
+ if (involvedNs.db() != request.getNamespaceString().db()) {
+ // SERVER-51886: It is not correct to assume that we are reading from a collection
+ // because the collection targeted by $out/$merge on a given database can have the same
+ // name as a view on the source database. As such, we determine whether the collection
+ // name references a view on the aggregation request's database. Note that the inverse
+ // scenario (mistaking a view for a collection) is not an issue because $merge/$out
+ // cannot target a view.
+ auto nssToCheck = NamespaceString(request.getNamespaceString().db(), involvedNs.coll());
+ if (viewCatalog && viewCatalog->lookup(opCtx, nssToCheck.ns())) {
+ auto status = resolveViewDefinition(nssToCheck);
+ if (!status.isOK()) {
+ return status;
+ }
+ } else {
+ resolvedNamespaces[involvedNs.coll()] = {involvedNs, std::vector<BSONObj>{}};
+ }
+ } else if (!db || CollectionCatalog::get(opCtx).lookupCollectionByNamespace(involvedNs)) {
+ // If the aggregation database exists and 'involvedNs' refers to a collection namespace,
+ // then we resolve it as an empty pipeline in order to read directly from the underlying
+ // collection. If the database doesn't exist, then we still resolve it as an empty
+ // pipeline because 'involvedNs' doesn't refer to a view namespace in our consistent
+ // snapshot of the view catalog.
+ resolvedNamespaces[involvedNs.coll()] = {involvedNs, std::vector<BSONObj>{}};
+ } else if (viewCatalog->lookup(opCtx, involvedNs.ns())) {
+ auto status = resolveViewDefinition(involvedNs);
+ if (!status.isOK()) {
+ return status;
+ }
} else {
// 'involvedNs' is neither a view nor a collection, so resolve it as an empty pipeline
// to treat it as reading from a non-existent collection.
diff --git a/src/mongo/s/query/cluster_aggregate.cpp b/src/mongo/s/query/cluster_aggregate.cpp
index e06d49d3850..bc4b9191c90 100644
--- a/src/mongo/s/query/cluster_aggregate.cpp
+++ b/src/mongo/s/query/cluster_aggregate.cpp
@@ -566,10 +566,19 @@ ShardId pickMergingShard(OperationContext* opCtx,
// definition. It's okay that this is incorrect, we will repopulate the real namespace map on the
// mongod. Note that this function must be called before forwarding an aggregation command on an
// unsharded collection, in order to verify that the involved namespaces are allowed to be sharded.
-auto resolveInvolvedNamespaces(OperationContext* opCtx, const LiteParsedPipeline& litePipe) {
+auto resolveInvolvedNamespaces(OperationContext* opCtx,
+ const LiteParsedPipeline& litePipe,
+ const NamespaceString& requestNss) {
StringMap<ExpressionContext::ResolvedNamespace> resolvedNamespaces;
for (auto&& nss : litePipe.getInvolvedNamespaces()) {
- resolvedNamespaces.try_emplace(nss.coll(), nss, std::vector<BSONObj>{});
+ // SERVER-51886: Only add namespaces that are in the same database as the original request.
+ // This is done to avoid namespace collisions for cross-database $merge operations that
+ // read from and write to collections that have the same name. Note that we don't have to
+ // verify whether the target collection is sharded as $merge can output to sharded
+ // collections. The output namespace will never need to be resolved as we cannot write to
+ // views.
+ if (requestNss.db() == nss.db())
+ resolvedNamespaces.try_emplace(nss.coll(), nss, std::vector<BSONObj>{});
}
return resolvedNamespaces;
}
@@ -801,7 +810,8 @@ Status ClusterAggregate::runAggregate(OperationContext* opCtx,
// If we don't have a routing table, then this is a $changeStream which must run on all shards.
invariant(routingInfo || (mustRunOnAll && litePipe.hasChangeStream()));
- auto resolvedNamespaces = resolveInvolvedNamespaces(opCtx, litePipe);
+ auto resolvedNamespaces =
+ resolveInvolvedNamespaces(opCtx, litePipe, request.getNamespaceString());
// A pipeline is allowed to passthrough to the primary shard iff the following conditions are
// met: