From a4d3270268d55c3f4839b8944a4d3eb47543e0fd Mon Sep 17 00:00:00 2001 From: Nicholas Zolnierz Date: Thu, 17 Oct 2019 15:29:01 +0000 Subject: SERVER-42916 Reject non-inline mapReduce to an existing sharded collection that is not sharded by _id --- ...harding_last_stable_mongos_and_mixed_shards.yml | 1 + .../suites/sharding_map_reduce_agg.yaml | 1 + .../map_reduce_invalid_output_collection.js | 140 +++++++++++++++++++++ src/mongo/db/commands/mr_common.cpp | 15 ++- src/mongo/s/commands/cluster_map_reduce.cpp | 5 +- 5 files changed, 156 insertions(+), 6 deletions(-) create mode 100644 jstests/sharding/map_reduce_invalid_output_collection.js diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml index 168c801dbb7..f3317e6811b 100644 --- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml +++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml @@ -31,6 +31,7 @@ selector: - jstests/sharding/explain_exec_stats_on_shards.js - jstests/sharding/extract_shard_key_values.js - jstests/sharding/features1.js + - jstests/sharding/map_reduce_invalid_output_collection.js - jstests/sharding/mongos_query_comment.js - jstests/sharding/prefix_shard_key.js - jstests/sharding/refine_collection_shard_key_basic.js diff --git a/buildscripts/resmokeconfig/suites/sharding_map_reduce_agg.yaml b/buildscripts/resmokeconfig/suites/sharding_map_reduce_agg.yaml index e54f21831a9..175efe9f381 100644 --- a/buildscripts/resmokeconfig/suites/sharding_map_reduce_agg.yaml +++ b/buildscripts/resmokeconfig/suites/sharding_map_reduce_agg.yaml @@ -9,6 +9,7 @@ selector: - jstests/sharding/authmr.js - jstests/sharding/causal_consistency_shell_support.js - jstests/sharding/localhostAuthBypass.js + - jstests/sharding/map_reduce_invalid_output_collection.js - jstests/sharding/map_reduce_invalid_result_set.js - jstests/sharding/max_time_ms_sharded.js - jstests/sharding/mr_and_agg_versioning.js diff --git a/jstests/sharding/map_reduce_invalid_output_collection.js b/jstests/sharding/map_reduce_invalid_output_collection.js new file mode 100644 index 00000000000..51d62d1632c --- /dev/null +++ b/jstests/sharding/map_reduce_invalid_output_collection.js @@ -0,0 +1,140 @@ +// Test that mapReduce correctly fails if the target collection is not unsharded or sharded by just +// _id. +(function() { +"use strict"; + +const st = new ShardingTest({shards: 2, mongos: 2}); + +const dbName = jsTest.name(); +const nsString = dbName + ".coll"; +const numDocs = 50000; +const numKeys = 1000; + +assert.commandWorked(st.s.adminCommand({enableSharding: dbName})); +st.ensurePrimaryShard(dbName, st.shard0.shardName); +assert.commandWorked(st.s.adminCommand({shardCollection: nsString, key: {key: 1}})); + +// Load chunk data through the stale mongos before moving a chunk. +const staleMongos1 = st.s1; +staleMongos1.getCollection(nsString).find().itcount(); + +assert.commandWorked(st.s.adminCommand({split: nsString, middle: {key: numKeys / 2}})); +assert.commandWorked( + st.s.adminCommand({moveChunk: nsString, find: {key: 0}, to: st.shard1.shardName})); + +const bulk = st.s.getCollection(nsString).initializeUnorderedBulkOp(); +for (let i = 0; i < numDocs; i++) { + bulk.insert({_id: i, key: (i % numKeys), value: i % numKeys}); +} +assert.commandWorked(bulk.execute()); + +const map = function() { + emit(this.key, this.value); +}; +const reduce = function(k, values) { + let total = 0; + for (let i = 0; i < values.length; i++) { + total += values[i]; + } + return total; +}; + +// Create and shard the output collection, with a shard key other than _id. +const outColl = "mr_out"; +assert.commandWorked( + st.s.adminCommand({shardCollection: dbName + "." + outColl, key: {not_id: 1}})); + +// Insert a document into the output collection such that it is not dropped and recreated by the +// legacy mapReduce. +assert.commandWorked(st.s.getDB(dbName).getCollection(outColl).insert({_id: -1, not_id: 0})); + +// TODO SERVER-42511 remove this once the switch to MR in agg is complete. +const usingAgg = st.getDB(dbName) + .adminCommand({getParameter: 1, internalQueryUseAggMapReduce: 1}) + .internalQueryUseAggMapReduce; +const expectedError = usingAgg ? 31313 : 31311; + +// Through the same mongos, verify that mapReduce fails since the output collection is not sharded +// by _id. +assert.commandFailedWithCode( + st.s.getDB(dbName).runCommand( + {mapReduce: "coll", map: map, reduce: reduce, out: {merge: outColl, sharded: true}}), + expectedError); + +assert.commandFailedWithCode( + st.s.getDB(dbName).runCommand( + {mapReduce: "coll", map: map, reduce: reduce, out: {reduce: outColl, sharded: true}}), + expectedError); + +// Expect a similar failure through a stale mongos. +assert.commandFailedWithCode( + staleMongos1.getDB(dbName).runCommand( + {mapReduce: "coll", map: map, reduce: reduce, out: {merge: outColl, sharded: true}}), + expectedError); + +// Mode replace is unique, since the legacy mapReduce will unconditionally drop and reshard the +// target collection on _id. +if (usingAgg) { + assert.commandFailedWithCode( + st.s.getDB(dbName).runCommand( + {mapReduce: "coll", map: map, reduce: reduce, out: {replace: outColl, sharded: true}}), + expectedError); +} else { + assert.commandWorked(st.s.getDB(dbName).runCommand( + {mapReduce: "coll", map: map, reduce: reduce, out: {replace: outColl, sharded: true}})); +} + +function testAgainstValidShardedOutput(shardKey) { + // Drop and reshard the target collection. + st.s.getDB(dbName).getCollection(outColl).drop(); + assert.commandWorked( + st.s.adminCommand({shardCollection: dbName + "." + outColl, key: shardKey})); + + // Insert a document into the output collection such that it is not dropped and recreated by the + // legacy mapReduce. + assert.commandWorked(st.s.getDB(dbName).getCollection(outColl).insert({_id: -1})); + + // Test that mapReduce succeeds since the target collection is sharded by _id. + assert.commandWorked(st.s.getDB(dbName).runCommand( + {mapReduce: "coll", map: map, reduce: reduce, out: {merge: outColl, sharded: true}})); + + // Run the same mapReduce through a stale mongos and expect it to pass as well. + assert.commandWorked(st.s.getDB(dbName).getCollection(outColl).remove({})); + assert.commandWorked(staleMongos1.getDB(dbName).runCommand( + {mapReduce: "coll", map: map, reduce: reduce, out: {merge: outColl, sharded: true}})); +} + +// Test against output collection sharded by {_id: 1}. +testAgainstValidShardedOutput({_id: 1}); + +// Test against output collection sharded by {_id: "hashed"}. +testAgainstValidShardedOutput({_id: "hashed"}); + +// Test that MR fails if the output collection is sharded by a compound key including _id. +(function testCompoundShardKey() { + // Drop and reshard the target collection. + st.s.getDB(dbName).getCollection(outColl).drop(); + assert.commandWorked( + st.s.adminCommand({shardCollection: dbName + "." + outColl, key: {_id: 1, a: 1}})); + + // Insert a document into the output collection such that it is not dropped and recreated by the + // legacy mapReduce. + assert.commandWorked(st.s.getDB(dbName).getCollection(outColl).insert({_id: -1, a: 1})); + + // Test that mapReduce succeeds since the target collection is not sharded by only _id. + assert.commandFailedWithCode( + st.s.getDB(dbName).runCommand( + {mapReduce: "coll", map: map, reduce: reduce, out: {merge: outColl, sharded: true}}), + expectedError); + + // Run the same mapReduce through a stale mongos and expect it to fail as well. Make sure to + // leave at least one document in the target collection for the same reason as above. + assert.commandWorked(st.s.getDB(dbName).getCollection(outColl).remove({_id: {$gt: 0}})); + assert.commandFailedWithCode( + staleMongos1.getDB(dbName).runCommand( + {mapReduce: "coll", map: map, reduce: reduce, out: {merge: outColl, sharded: true}}), + expectedError); +})(); + +st.stop(); +})(); diff --git a/src/mongo/db/commands/mr_common.cpp b/src/mongo/db/commands/mr_common.cpp index 1621ac716cd..eab02894609 100644 --- a/src/mongo/db/commands/mr_common.cpp +++ b/src/mongo/db/commands/mr_common.cpp @@ -316,11 +316,22 @@ std::unique_ptr translateFromMR( if (outType == OutputType::Merge || outType == OutputType::Reduce) { uassert(ErrorCodes::InvalidOptions, "Source collection cannot be the same as destination collection in MapReduce when " - "using merge or " - "reduce actions", + "using merge or reduce actions", inNss != outNss); } + // If non-inline output, verify that the target collection is *not* sharded by anything other + // than _id. + if (outType != OutputType::InMemory) { + auto [shardKey, targetCollectionVersion] = + expCtx->mongoProcessInterface->ensureFieldsUniqueOrResolveDocumentKey( + expCtx, boost::none, boost::none, outNss); + uassert(31313, + "The mapReduce target collection must either be unsharded or sharded by {_id: 1} " + "or {_id: 'hashed'}", + shardKey == std::set{FieldPath("_id"s)}); + } + // TODO: It would be good to figure out what kind of errors this would produce in the Status. // It would be better not to produce something incomprehensible out of an internal translation. return uassertStatusOK(Pipeline::create( diff --git a/src/mongo/s/commands/cluster_map_reduce.cpp b/src/mongo/s/commands/cluster_map_reduce.cpp index 9ae34eca3c5..0eadc1b8ee3 100644 --- a/src/mongo/s/commands/cluster_map_reduce.cpp +++ b/src/mongo/s/commands/cluster_map_reduce.cpp @@ -671,10 +671,7 @@ bool runMapReduce(OperationContext* opCtx, outputRoutingInfo.cm()); } - if (!ok) { - errmsg = str::stream() << "MR post processing failed: " << singleResult.toString(); - return false; - } + uassert(31311, str::stream() << "MR post processing failed: " << singleResult.toString(), ok); // copy some elements from a single result // annoying that we have to copy all results for inline, but no way around it -- cgit v1.2.1