diff options
author | Mihai Andrei <mihai.andrei@10gen.com> | 2020-12-01 12:10:53 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-01-04 19:11:12 +0000 |
commit | e490b04a5e2511e042660c1e1d60b0636f5a14bd (patch) | |
tree | de2eb0435c2ee82294140d776592dd62644a55dc | |
parent | 37e81074de8577873b515f7e7ba9ef1fbc40a93a (diff) | |
download | mongo-e490b04a5e2511e042660c1e1d60b0636f5a14bd.tar.gz |
SERVER-51886 $lookup + $merge pipeline may fail to resolve views correctly when collection names collide
(cherry picked from commit 045829020fb4a3972b316b1ad60e9ed2f285b092)
(cherry picked from commit dcd06945691b7cdd520b4c28428d941ee685104d)
-rw-r--r-- | jstests/aggregation/view_resolution_namespace_collision.js | 54 | ||||
-rw-r--r-- | src/mongo/db/commands/run_aggregate.cpp | 55 | ||||
-rw-r--r-- | src/mongo/s/query/cluster_aggregate.cpp | 16 |
3 files changed, 105 insertions, 20 deletions
diff --git a/jstests/aggregation/view_resolution_namespace_collision.js b/jstests/aggregation/view_resolution_namespace_collision.js new file mode 100644 index 00000000000..026874a1179 --- /dev/null +++ b/jstests/aggregation/view_resolution_namespace_collision.js @@ -0,0 +1,54 @@ +/** + * Tests that view resolution works correctly when a cross-database aggregation targets a + * collection and a view with the same name on different databases. + * + * @tags: [assumes_no_implicit_collection_creation_after_drop] + */ +(function() { +"use strict"; + +const dbName = "test"; +const testDB = db.getSiblingDB(dbName); +const siblingDBName = "otherDB"; +const siblingDB = testDB.getSiblingDB(siblingDBName); + +// Since cross-db $merge only works against existing databases in a sharded environment, we create a +// dummy collection on 'siblingDB' to allow this test to run in a sharded environment. +assert.commandWorked(siblingDB.foo.insert({})); + +const sourceName = jsTestName(); +const otherCollName = jsTestName() + "_other_coll"; +const collidingName = "collision_collection"; +const sourceColl = testDB[sourceName]; +const otherColl = testDB[otherCollName]; + +sourceColl.drop(); +otherColl.drop(); +testDB[collidingName].drop(); + +assert.commandWorked(sourceColl.insert({_id: 0})); +assert.commandWorked(otherColl.insert({_id: 0, notsecret: 1, secret: "really secret"})); + +// Create a view on 'testDB' that will have the same name as the collection that $merge will create. +assert.commandWorked(testDB.runCommand( + {create: collidingName, viewOn: otherCollName, pipeline: [{$project: {secret: 0}}]})); + +// Verify that the view gets resolved correctly when performing a cross database aggregation where +// the $lookup references a view on the source database and the $merge references a collection on +// the target database with the same name as the view. +const lookupStage = { + $lookup: {from: collidingName, localField: "_id", foreignField: "_id", as: "matches"} +}; +const withoutWrite = sourceColl.aggregate([lookupStage]).toArray(); +const mergeStage = { + $merge: {into: {db: siblingDBName, coll: collidingName}} +}; + +// The aggregate should always use the view on 'testDB', not an empty collection on 'siblingDB'. +sourceColl.aggregate([lookupStage, mergeStage]).toArray(); +const withWrite = siblingDB[collidingName].find().toArray(); +assert.eq(withoutWrite, withWrite); +siblingDB[collidingName].drop(); + +siblingDB.dropDatabase(); +})();
\ No newline at end of file diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp index d24bac8cb9a..915c827e11e 100644 --- a/src/mongo/db/commands/run_aggregate.cpp +++ b/src/mongo/db/commands/run_aggregate.cpp @@ -291,29 +291,17 @@ StatusWith<StringMap<ExpressionContext::ResolvedNamespace>> resolveInvolvedNames continue; } - if (involvedNs.db() != request.getNamespaceString().db()) { - // If the involved namespace is not in the same database as the aggregation, it must be - // from a $merge to a collection in a different database. Since we cannot write to - // views, simply assume that the namespace is a collection. - resolvedNamespaces[involvedNs.coll()] = {involvedNs, std::vector<BSONObj>{}}; - } else if (!db || db->getCollection(opCtx, involvedNs)) { - // If the aggregation database exists and 'involvedNs' refers to a collection namespace, - // then we resolve it as an empty pipeline in order to read directly from the underlying - // collection. If the database doesn't exist, then we still resolve it as an empty - // pipeline because 'involvedNs' doesn't refer to a view namespace in our consistent - // snapshot of the view catalog. - resolvedNamespaces[involvedNs.coll()] = {involvedNs, std::vector<BSONObj>{}}; - } else if (viewCatalog->lookup(opCtx, involvedNs.ns())) { - // If 'involvedNs' refers to a view namespace, then we resolve its definition. - auto resolvedView = viewCatalog->resolveView(opCtx, involvedNs); + // If 'ns' refers to a view namespace, then we resolve its definition. + auto resolveViewDefinition = [&](const NamespaceString& ns) -> Status { + auto resolvedView = viewCatalog->resolveView(opCtx, ns); if (!resolvedView.isOK()) { return {ErrorCodes::FailedToParse, str::stream() << "Failed to resolve view '" << involvedNs.ns() << "': " << resolvedView.getStatus().toString()}; } - resolvedNamespaces[involvedNs.coll()] = {resolvedView.getValue().getNamespace(), - resolvedView.getValue().getPipeline()}; + resolvedNamespaces[ns.coll()] = {resolvedView.getValue().getNamespace(), + resolvedView.getValue().getPipeline()}; // We parse the pipeline corresponding to the resolved view in case we must resolve // other view namespaces that are also involved. @@ -325,6 +313,39 @@ StatusWith<StringMap<ExpressionContext::ResolvedNamespace>> resolveInvolvedNames involvedNamespacesQueue.insert(involvedNamespacesQueue.end(), resolvedViewInvolvedNamespaces.begin(), resolvedViewInvolvedNamespaces.end()); + return Status::OK(); + }; + + // If the involved namespace is not in the same database as the aggregation, it must be + // from a $merge to a collection in a different database. + if (involvedNs.db() != request.getNamespaceString().db()) { + // SERVER-51886: It is not correct to assume that we are reading from a collection + // because the collection targeted by $out/$merge on a given database can have the same + // name as a view on the source database. As such, we determine whether the collection + // name references a view on the aggregation request's database. Note that the inverse + // scenario (mistaking a view for a collection) is not an issue because $merge/$out + // cannot target a view. + auto nssToCheck = NamespaceString(request.getNamespaceString().db(), involvedNs.coll()); + if (viewCatalog && viewCatalog->lookup(opCtx, nssToCheck.ns())) { + auto status = resolveViewDefinition(nssToCheck); + if (!status.isOK()) { + return status; + } + } else { + resolvedNamespaces[involvedNs.coll()] = {involvedNs, std::vector<BSONObj>{}}; + } + } else if (!db || CollectionCatalog::get(opCtx).lookupCollectionByNamespace(involvedNs)) { + // If the aggregation database exists and 'involvedNs' refers to a collection namespace, + // then we resolve it as an empty pipeline in order to read directly from the underlying + // collection. If the database doesn't exist, then we still resolve it as an empty + // pipeline because 'involvedNs' doesn't refer to a view namespace in our consistent + // snapshot of the view catalog. + resolvedNamespaces[involvedNs.coll()] = {involvedNs, std::vector<BSONObj>{}}; + } else if (viewCatalog->lookup(opCtx, involvedNs.ns())) { + auto status = resolveViewDefinition(involvedNs); + if (!status.isOK()) { + return status; + } } else { // 'involvedNs' is neither a view nor a collection, so resolve it as an empty pipeline // to treat it as reading from a non-existent collection. diff --git a/src/mongo/s/query/cluster_aggregate.cpp b/src/mongo/s/query/cluster_aggregate.cpp index e06d49d3850..bc4b9191c90 100644 --- a/src/mongo/s/query/cluster_aggregate.cpp +++ b/src/mongo/s/query/cluster_aggregate.cpp @@ -566,10 +566,19 @@ ShardId pickMergingShard(OperationContext* opCtx, // definition. It's okay that this is incorrect, we will repopulate the real namespace map on the // mongod. Note that this function must be called before forwarding an aggregation command on an // unsharded collection, in order to verify that the involved namespaces are allowed to be sharded. -auto resolveInvolvedNamespaces(OperationContext* opCtx, const LiteParsedPipeline& litePipe) { +auto resolveInvolvedNamespaces(OperationContext* opCtx, + const LiteParsedPipeline& litePipe, + const NamespaceString& requestNss) { StringMap<ExpressionContext::ResolvedNamespace> resolvedNamespaces; for (auto&& nss : litePipe.getInvolvedNamespaces()) { - resolvedNamespaces.try_emplace(nss.coll(), nss, std::vector<BSONObj>{}); + // SERVER-51886: Only add namespaces that are in the same database as the original request. + // This is done to avoid namespace collisions for cross-database $merge operations that + // read from and write to collections that have the same name. Note that we don't have to + // verify whether the target collection is sharded as $merge can output to sharded + // collections. The output namespace will never need to be resolved as we cannot write to + // views. + if (requestNss.db() == nss.db()) + resolvedNamespaces.try_emplace(nss.coll(), nss, std::vector<BSONObj>{}); } return resolvedNamespaces; } @@ -801,7 +810,8 @@ Status ClusterAggregate::runAggregate(OperationContext* opCtx, // If we don't have a routing table, then this is a $changeStream which must run on all shards. invariant(routingInfo || (mustRunOnAll && litePipe.hasChangeStream())); - auto resolvedNamespaces = resolveInvolvedNamespaces(opCtx, litePipe); + auto resolvedNamespaces = + resolveInvolvedNamespaces(opCtx, litePipe, request.getNamespaceString()); // A pipeline is allowed to passthrough to the primary shard iff the following conditions are // met: |