summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Zolnierz <nicholas.zolnierz@mongodb.com>2019-10-17 15:29:01 +0000
committerevergreen <evergreen@mongodb.com>2019-10-17 15:29:01 +0000
commita4d3270268d55c3f4839b8944a4d3eb47543e0fd (patch)
treef8302b0ecf9dd5d6c3f1d0b86f1189143685e03e
parent65decc93b23acf373d33d7453bb2ba745a6ef319 (diff)
downloadmongo-a4d3270268d55c3f4839b8944a4d3eb47543e0fd.tar.gz
SERVER-42916 Reject non-inline mapReduce to an existing sharded collection that is not sharded by _id
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_map_reduce_agg.yaml1
-rw-r--r--jstests/sharding/map_reduce_invalid_output_collection.js140
-rw-r--r--src/mongo/db/commands/mr_common.cpp15
-rw-r--r--src/mongo/s/commands/cluster_map_reduce.cpp5
5 files changed, 156 insertions, 6 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
index 168c801dbb7..f3317e6811b 100644
--- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
@@ -31,6 +31,7 @@ selector:
- jstests/sharding/explain_exec_stats_on_shards.js
- jstests/sharding/extract_shard_key_values.js
- jstests/sharding/features1.js
+ - jstests/sharding/map_reduce_invalid_output_collection.js
- jstests/sharding/mongos_query_comment.js
- jstests/sharding/prefix_shard_key.js
- jstests/sharding/refine_collection_shard_key_basic.js
diff --git a/buildscripts/resmokeconfig/suites/sharding_map_reduce_agg.yaml b/buildscripts/resmokeconfig/suites/sharding_map_reduce_agg.yaml
index e54f21831a9..175efe9f381 100644
--- a/buildscripts/resmokeconfig/suites/sharding_map_reduce_agg.yaml
+++ b/buildscripts/resmokeconfig/suites/sharding_map_reduce_agg.yaml
@@ -9,6 +9,7 @@ selector:
- jstests/sharding/authmr.js
- jstests/sharding/causal_consistency_shell_support.js
- jstests/sharding/localhostAuthBypass.js
+ - jstests/sharding/map_reduce_invalid_output_collection.js
- jstests/sharding/map_reduce_invalid_result_set.js
- jstests/sharding/max_time_ms_sharded.js
- jstests/sharding/mr_and_agg_versioning.js
diff --git a/jstests/sharding/map_reduce_invalid_output_collection.js b/jstests/sharding/map_reduce_invalid_output_collection.js
new file mode 100644
index 00000000000..51d62d1632c
--- /dev/null
+++ b/jstests/sharding/map_reduce_invalid_output_collection.js
@@ -0,0 +1,140 @@
+// Test that mapReduce correctly fails if the target collection is not unsharded or sharded by just
+// _id.
+(function() {
+"use strict";
+
+const st = new ShardingTest({shards: 2, mongos: 2});
+
+const dbName = jsTest.name();
+const nsString = dbName + ".coll";
+const numDocs = 50000;
+const numKeys = 1000;
+
+assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
+st.ensurePrimaryShard(dbName, st.shard0.shardName);
+assert.commandWorked(st.s.adminCommand({shardCollection: nsString, key: {key: 1}}));
+
+// Load chunk data through the stale mongos before moving a chunk.
+const staleMongos1 = st.s1;
+staleMongos1.getCollection(nsString).find().itcount();
+
+assert.commandWorked(st.s.adminCommand({split: nsString, middle: {key: numKeys / 2}}));
+assert.commandWorked(
+ st.s.adminCommand({moveChunk: nsString, find: {key: 0}, to: st.shard1.shardName}));
+
+const bulk = st.s.getCollection(nsString).initializeUnorderedBulkOp();
+for (let i = 0; i < numDocs; i++) {
+ bulk.insert({_id: i, key: (i % numKeys), value: i % numKeys});
+}
+assert.commandWorked(bulk.execute());
+
+const map = function() {
+ emit(this.key, this.value);
+};
+const reduce = function(k, values) {
+ let total = 0;
+ for (let i = 0; i < values.length; i++) {
+ total += values[i];
+ }
+ return total;
+};
+
+// Create and shard the output collection, with a shard key other than _id.
+const outColl = "mr_out";
+assert.commandWorked(
+ st.s.adminCommand({shardCollection: dbName + "." + outColl, key: {not_id: 1}}));
+
+// Insert a document into the output collection such that it is not dropped and recreated by the
+// legacy mapReduce.
+assert.commandWorked(st.s.getDB(dbName).getCollection(outColl).insert({_id: -1, not_id: 0}));
+
+// TODO SERVER-42511 remove this once the switch to MR in agg is complete.
+const usingAgg = st.getDB(dbName)
+ .adminCommand({getParameter: 1, internalQueryUseAggMapReduce: 1})
+ .internalQueryUseAggMapReduce;
+const expectedError = usingAgg ? 31313 : 31311;
+
+// Through the same mongos, verify that mapReduce fails since the output collection is not sharded
+// by _id.
+assert.commandFailedWithCode(
+ st.s.getDB(dbName).runCommand(
+ {mapReduce: "coll", map: map, reduce: reduce, out: {merge: outColl, sharded: true}}),
+ expectedError);
+
+assert.commandFailedWithCode(
+ st.s.getDB(dbName).runCommand(
+ {mapReduce: "coll", map: map, reduce: reduce, out: {reduce: outColl, sharded: true}}),
+ expectedError);
+
+// Expect a similar failure through a stale mongos.
+assert.commandFailedWithCode(
+ staleMongos1.getDB(dbName).runCommand(
+ {mapReduce: "coll", map: map, reduce: reduce, out: {merge: outColl, sharded: true}}),
+ expectedError);
+
+// Mode replace is unique, since the legacy mapReduce will unconditionally drop and reshard the
+// target collection on _id.
+if (usingAgg) {
+ assert.commandFailedWithCode(
+ st.s.getDB(dbName).runCommand(
+ {mapReduce: "coll", map: map, reduce: reduce, out: {replace: outColl, sharded: true}}),
+ expectedError);
+} else {
+ assert.commandWorked(st.s.getDB(dbName).runCommand(
+ {mapReduce: "coll", map: map, reduce: reduce, out: {replace: outColl, sharded: true}}));
+}
+
+function testAgainstValidShardedOutput(shardKey) {
+ // Drop and reshard the target collection.
+ st.s.getDB(dbName).getCollection(outColl).drop();
+ assert.commandWorked(
+ st.s.adminCommand({shardCollection: dbName + "." + outColl, key: shardKey}));
+
+ // Insert a document into the output collection such that it is not dropped and recreated by the
+ // legacy mapReduce.
+ assert.commandWorked(st.s.getDB(dbName).getCollection(outColl).insert({_id: -1}));
+
+ // Test that mapReduce succeeds since the target collection is sharded by _id.
+ assert.commandWorked(st.s.getDB(dbName).runCommand(
+ {mapReduce: "coll", map: map, reduce: reduce, out: {merge: outColl, sharded: true}}));
+
+ // Run the same mapReduce through a stale mongos and expect it to pass as well.
+ assert.commandWorked(st.s.getDB(dbName).getCollection(outColl).remove({}));
+ assert.commandWorked(staleMongos1.getDB(dbName).runCommand(
+ {mapReduce: "coll", map: map, reduce: reduce, out: {merge: outColl, sharded: true}}));
+}
+
+// Test against output collection sharded by {_id: 1}.
+testAgainstValidShardedOutput({_id: 1});
+
+// Test against output collection sharded by {_id: "hashed"}.
+testAgainstValidShardedOutput({_id: "hashed"});
+
+// Test that MR fails if the output collection is sharded by a compound key including _id.
+(function testCompoundShardKey() {
+ // Drop and reshard the target collection.
+ st.s.getDB(dbName).getCollection(outColl).drop();
+ assert.commandWorked(
+ st.s.adminCommand({shardCollection: dbName + "." + outColl, key: {_id: 1, a: 1}}));
+
+ // Insert a document into the output collection such that it is not dropped and recreated by the
+ // legacy mapReduce.
+ assert.commandWorked(st.s.getDB(dbName).getCollection(outColl).insert({_id: -1, a: 1}));
+
+ // Test that mapReduce succeeds since the target collection is not sharded by only _id.
+ assert.commandFailedWithCode(
+ st.s.getDB(dbName).runCommand(
+ {mapReduce: "coll", map: map, reduce: reduce, out: {merge: outColl, sharded: true}}),
+ expectedError);
+
+ // Run the same mapReduce through a stale mongos and expect it to fail as well. Make sure to
+ // leave at least one document in the target collection for the same reason as above.
+ assert.commandWorked(st.s.getDB(dbName).getCollection(outColl).remove({_id: {$gt: 0}}));
+ assert.commandFailedWithCode(
+ staleMongos1.getDB(dbName).runCommand(
+ {mapReduce: "coll", map: map, reduce: reduce, out: {merge: outColl, sharded: true}}),
+ expectedError);
+})();
+
+st.stop();
+})();
diff --git a/src/mongo/db/commands/mr_common.cpp b/src/mongo/db/commands/mr_common.cpp
index 1621ac716cd..eab02894609 100644
--- a/src/mongo/db/commands/mr_common.cpp
+++ b/src/mongo/db/commands/mr_common.cpp
@@ -316,11 +316,22 @@ std::unique_ptr<Pipeline, PipelineDeleter> translateFromMR(
if (outType == OutputType::Merge || outType == OutputType::Reduce) {
uassert(ErrorCodes::InvalidOptions,
"Source collection cannot be the same as destination collection in MapReduce when "
- "using merge or "
- "reduce actions",
+ "using merge or reduce actions",
inNss != outNss);
}
+ // If non-inline output, verify that the target collection is *not* sharded by anything other
+ // than _id.
+ if (outType != OutputType::InMemory) {
+ auto [shardKey, targetCollectionVersion] =
+ expCtx->mongoProcessInterface->ensureFieldsUniqueOrResolveDocumentKey(
+ expCtx, boost::none, boost::none, outNss);
+ uassert(31313,
+ "The mapReduce target collection must either be unsharded or sharded by {_id: 1} "
+ "or {_id: 'hashed'}",
+ shardKey == std::set<FieldPath>{FieldPath("_id"s)});
+ }
+
// TODO: It would be good to figure out what kind of errors this would produce in the Status.
// It would be better not to produce something incomprehensible out of an internal translation.
return uassertStatusOK(Pipeline::create(
diff --git a/src/mongo/s/commands/cluster_map_reduce.cpp b/src/mongo/s/commands/cluster_map_reduce.cpp
index 9ae34eca3c5..0eadc1b8ee3 100644
--- a/src/mongo/s/commands/cluster_map_reduce.cpp
+++ b/src/mongo/s/commands/cluster_map_reduce.cpp
@@ -671,10 +671,7 @@ bool runMapReduce(OperationContext* opCtx,
outputRoutingInfo.cm());
}
- if (!ok) {
- errmsg = str::stream() << "MR post processing failed: " << singleResult.toString();
- return false;
- }
+ uassert(31311, str::stream() << "MR post processing failed: " << singleResult.toString(), ok);
// copy some elements from a single result
// annoying that we have to copy all results for inline, but no way around it