diff options
author | Bernard Gorman <bernard.gorman@gmail.com> | 2018-05-12 14:06:35 +0100 |
---|---|---|
committer | Bernard Gorman <bernard.gorman@gmail.com> | 2018-06-11 11:58:10 +0100 |
commit | 0dd1fc7ddde2a489558f5328dce5125bddfb9e4d (patch) | |
tree | c7d8336ff21e411eeeb20d11c25dd7a4bbb2b76a | |
parent | c7451c0e11c2a782e9c0dabe16cbad744e4c451a (diff) | |
download | mongo-0dd1fc7ddde2a489558f5328dce5125bddfb9e4d.tar.gz |
SERVER-34971 Improve mongoS targeting for single-shard updates, and for replacement-style updates when the shard key includes _id
-rw-r--r-- | buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml | 2 | ||||
-rw-r--r-- | jstests/sharding/mongos_validate_writes.js | 20 | ||||
-rw-r--r-- | jstests/sharding/stale_mongos_updates_and_removes.js | 149 | ||||
-rw-r--r-- | jstests/sharding/update_replace_id.js | 166 | ||||
-rw-r--r-- | jstests/sharding/update_sharded.js | 35 | ||||
-rw-r--r-- | src/mongo/s/shard_key_pattern.cpp | 11 | ||||
-rw-r--r-- | src/mongo/s/shard_key_pattern.h | 8 | ||||
-rw-r--r-- | src/mongo/s/write_ops/chunk_manager_targeter.cpp | 336 | ||||
-rw-r--r-- | src/mongo/s/write_ops/chunk_manager_targeter.h | 17 |
9 files changed, 535 insertions, 209 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml index 467108eaace..a12c0ad5949 100644 --- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml +++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml @@ -75,6 +75,8 @@ selector: - jstests/sharding/accurate_count_with_predicate.js # Enable when SERVER-33538 is backported. - jstests/sharding/mapReduce_outSharded_checkUUID.js + # Enable if SERVER-34971 is backported or 4.2 becomes last-stable + - jstests/sharding/update_replace_id.js executor: config: diff --git a/jstests/sharding/mongos_validate_writes.js b/jstests/sharding/mongos_validate_writes.js index 85b7dbb136f..d9114a6033f 100644 --- a/jstests/sharding/mongos_validate_writes.js +++ b/jstests/sharding/mongos_validate_writes.js @@ -21,7 +21,11 @@ assert.commandWorked(admin.runCommand({enableSharding: coll.getDB() + ""})); st.ensurePrimaryShard(coll.getDB().getName(), st.shard1.shardName); coll.ensureIndex({a: 1}); - assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {a: 1}})); + + // Shard the collection on {a: 1} and move one chunk to another shard. Updates need to be across + // two shards to trigger an error, otherwise they are versioned and will succeed after raising + // a StaleConfigException. + st.shardColl(coll, {a: 1}, {a: 0}, {a: 1}, coll.getDB(), true); // Let the stale mongos see the collection state staleCollA.findOne(); @@ -30,7 +34,7 @@ // Change the collection sharding state coll.drop(); coll.ensureIndex({b: 1}); - assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {b: 1}})); + st.shardColl(coll, {b: 1}, {b: 0}, {b: 1}, coll.getDB(), true); // Make sure that we can successfully insert, even though we have stale state assert.writeOK(staleCollA.insert({b: "b"})); @@ -41,7 +45,7 @@ // Change the collection sharding state coll.drop(); coll.ensureIndex({c: 1}); - assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {c: 1}})); + st.shardColl(coll, {c: 1}, {c: 0}, {c: 1}, coll.getDB(), true); // Make sure we can successfully upsert, even though we have stale state assert.writeOK(staleCollA.update({c: "c"}, {c: "c"}, true)); @@ -52,7 +56,7 @@ // Change the collection sharding state coll.drop(); coll.ensureIndex({d: 1}); - assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {d: 1}})); + st.shardColl(coll, {d: 1}, {d: 0}, {d: 1}, coll.getDB(), true); // Make sure we can successfully update, even though we have stale state assert.writeOK(coll.insert({d: "d"})); @@ -67,13 +71,9 @@ // Change the collection sharding state coll.drop(); coll.ensureIndex({e: 1}); - // Deletes need to be across two shards to trigger an error - this is probably an exceptional - // case + // Deletes need to be across two shards to trigger an error. st.ensurePrimaryShard(coll.getDB().getName(), st.shard0.shardName); - assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {e: 1}})); - assert.commandWorked(admin.runCommand({split: coll + "", middle: {e: 0}})); - assert.commandWorked( - admin.runCommand({moveChunk: coll + "", find: {e: 0}, to: st.shard1.shardName})); + st.shardColl(coll, {e: 1}, {e: 0}, {e: 1}, coll.getDB(), true); // Make sure we can successfully remove, even though we have stale state assert.writeOK(coll.insert({e: "e"})); diff --git a/jstests/sharding/stale_mongos_updates_and_removes.js b/jstests/sharding/stale_mongos_updates_and_removes.js index 65656964661..1c0e251d296 100644 --- a/jstests/sharding/stale_mongos_updates_and_removes.js +++ b/jstests/sharding/stale_mongos_updates_and_removes.js @@ -26,7 +26,7 @@ assert(staleMongos.getCollection(collNS).drop()); assert.commandWorked(staleMongos.adminCommand({shardCollection: collNS, key: {x: 1}})); - for (var i = 0; i < numShardKeys; i++) { + for (let i = 0; i < numShardKeys; i++) { assert.writeOK(staleMongos.getCollection(collNS).insert({x: i, fieldToUpdate: 0})); assert.writeOK(staleMongos.getCollection(collNS).insert({x: i, fieldToUpdate: 0})); } @@ -37,21 +37,20 @@ st.configRS.awaitLastOpCommitted(); } - // Create a new sharded collection, and split data into two chunks on different shards using the + // Create a new sharded collection, then split it into two chunks on different shards using the // stale mongos. Then use the fresh mongos to consolidate the chunks onto one of the shards. - // In the end: // staleMongos will see: // shard0: (-inf, splitPoint] // shard1: (splitPoint, inf] // freshMongos will see: // shard0: (-inf, splitPoint], (splitPoint, inf] // shard1: - function makeStaleMongosTargetMultipleShards() { + function makeStaleMongosTargetMultipleShardsWhenAllChunksAreOnOneShard() { resetCollection(); // Make sure staleMongos sees all data on first shard. - var chunk = staleMongos.getCollection("config.chunks") - .findOne({min: {x: MinKey}, max: {x: MaxKey}}); + const chunk = staleMongos.getCollection("config.chunks") + .findOne({min: {x: MinKey}, max: {x: MaxKey}}); assert(chunk.shard === st.shard0.shardName); // Make sure staleMongos sees two chunks on two different shards. @@ -66,19 +65,20 @@ {moveChunk: collNS, find: {x: 0}, to: st.shard0.shardName, _waitForDelete: true})); } - // Create a new sharded collection and move a chunk from one shard to another. In the end: + // Create a new sharded collection with a single chunk, then move that chunk from the primary + // shard to another shard using the fresh mongos. // staleMongos will see: // shard0: (-inf, inf] // shard1: // freshMongos will see: // shard0: // shard1: (-inf, inf] - function makeStaleMongosTargetSingleShard() { + function makeStaleMongosTargetOneShardWhenAllChunksAreOnAnotherShard() { resetCollection(); // Make sure staleMongos sees all data on first shard. - var chunk = staleMongos.getCollection("config.chunks") - .findOne({min: {x: MinKey}, max: {x: MaxKey}}); + const chunk = staleMongos.getCollection("config.chunks") + .findOne({min: {x: MinKey}, max: {x: MaxKey}}); assert(chunk.shard === st.shard0.shardName); // Use freshMongos to move chunk to another shard. @@ -86,11 +86,35 @@ {moveChunk: collNS, find: {x: 0}, to: st.shard1.shardName, _waitForDelete: true})); } + // Create a new sharded collection, then split it into two chunks on different shards using the + // fresh mongos. + // staleMongos will see: + // shard0: (-inf, inf] + // shard1: + // freshMongos will see: + // shard0: (-inf, splitPoint] + // shard1: (splitPoint, inf] + function makeStaleMongosTargetOneShardWhenChunksAreOnMultipleShards() { + resetCollection(); + + // Make sure staleMongos sees all data on first shard. + const chunk = staleMongos.getCollection("config.chunks") + .findOne({min: {x: MinKey}, max: {x: MaxKey}}); + assert(chunk.shard === st.shard0.shardName); + + // Use freshMongos to split and move chunks to both shards. + assert.commandWorked(freshMongos.adminCommand({split: collNS, middle: {x: splitPoint}})); + assert.commandWorked(freshMongos.adminCommand( + {moveChunk: collNS, find: {x: 0}, to: st.shard1.shardName, _waitForDelete: true})); + + st.configRS.awaitLastOpCommitted(); + } + function checkAllRemoveQueries(makeMongosStaleFunc) { - var multi = {justOne: false}; - var single = {justOne: true}; + const multi = {justOne: false}; + const single = {justOne: true}; - var doRemove = function(query, multiOption, makeMongosStaleFunc) { + function doRemove(query, multiOption, makeMongosStaleFunc) { makeMongosStaleFunc(); assert.writeOK(staleMongos.getCollection(collNS).remove(query, multiOption)); if (multiOption.justOne) { @@ -100,13 +124,13 @@ // All documents matching the query should have been removed. assert.eq(0, staleMongos.getCollection(collNS).find(query).itcount()); } - }; + } - var checkRemoveIsInvalid = function(query, multiOption, makeMongosStaleFunc) { + function checkRemoveIsInvalid(query, multiOption, makeMongosStaleFunc) { makeMongosStaleFunc(); - var res = staleMongos.getCollection(collNS).remove(query, multiOption); + const res = staleMongos.getCollection(collNS).remove(query, multiOption); assert.writeError(res); - }; + } // Not possible because single remove requires equality match on shard key. checkRemoveIsInvalid(emptyQuery, single, makeMongosStaleFunc); @@ -126,14 +150,14 @@ } function checkAllUpdateQueries(makeMongosStaleFunc) { - var oUpdate = {$inc: {fieldToUpdate: 1}}; // op-style update (non-idempotent) - var rUpdate = {x: 0, fieldToUpdate: 1}; // replacement-style update (idempotent) - var queryAfterUpdate = {fieldToUpdate: 1}; + const oUpdate = {$inc: {fieldToUpdate: 1}}; // op-style update (non-idempotent) + const rUpdate = {x: 0, fieldToUpdate: 1}; // replacement-style update (idempotent) + const queryAfterUpdate = {fieldToUpdate: 1}; - var multi = {multi: true}; - var single = {multi: false}; + const multi = {multi: true}; + const single = {multi: false}; - var doUpdate = function(query, update, multiOption, makeMongosStaleFunc) { + function doUpdate(query, update, multiOption, makeMongosStaleFunc) { makeMongosStaleFunc(); assert.writeOK(staleMongos.getCollection(collNS).update(query, update, multiOption)); if (multiOption.multi) { @@ -144,83 +168,100 @@ // A total of one document should have been updated. assert.eq(1, staleMongos.getCollection(collNS).find(queryAfterUpdate).itcount()); } - }; + } - var checkUpdateIsInvalid = function(query, update, multiOption, makeMongosStaleFunc, err) { + function assertUpdateIsInvalid(query, update, multiOption, makeMongosStaleFunc) { makeMongosStaleFunc(); - var res = staleMongos.getCollection(collNS).update(query, update, multiOption); + const res = staleMongos.getCollection(collNS).update(query, update, multiOption); assert.writeError(res); - }; + } + + function assertUpdateIsValidIfAllChunksOnSingleShard( + query, update, multiOption, makeMongosStaleFunc) { + if (makeMongosStaleFunc == makeStaleMongosTargetOneShardWhenChunksAreOnMultipleShards) { + assertUpdateIsInvalid(query, update, multiOption, makeMongosStaleFunc); + } else { + doUpdate(query, update, multiOption, makeMongosStaleFunc); + } + } + + // Note on the tests below: single-doc updates are able to succeed even in cases where the + // stale mongoS incorrectly believes that the update targets multiple shards, because the + // mongoS write path swallows the first error encountered in each batch, then internally + // refreshes its routing table and tries the write again. Because all chunks are actually + // on a single shard in two of the three test cases, this second update attempt succeeds. // This update has inconsistent behavior as explained in SERVER-22895. // doUpdate(emptyQuery, rUpdate, single, makeMongosStaleFunc); // Not possible because replacement-style requires equality match on shard key. - checkUpdateIsInvalid(emptyQuery, rUpdate, multi, makeMongosStaleFunc); + assertUpdateIsInvalid(emptyQuery, rUpdate, multi, makeMongosStaleFunc); - // Not possible because op-style requires equality match on shard key if single update. - checkUpdateIsInvalid(emptyQuery, oUpdate, single, makeMongosStaleFunc); + // Single op-style update succeeds if all chunks are on one shard, regardless of staleness. + assertUpdateIsValidIfAllChunksOnSingleShard( + emptyQuery, oUpdate, single, makeMongosStaleFunc); doUpdate(emptyQuery, oUpdate, multi, makeMongosStaleFunc); doUpdate(pointQuery, rUpdate, single, makeMongosStaleFunc); // Not possible because replacement-style requires multi=false. - checkUpdateIsInvalid(pointQuery, rUpdate, multi, makeMongosStaleFunc); + assertUpdateIsInvalid(pointQuery, rUpdate, multi, makeMongosStaleFunc); doUpdate(pointQuery, oUpdate, single, makeMongosStaleFunc); doUpdate(pointQuery, oUpdate, multi, makeMongosStaleFunc); doUpdate(rangeQuery, rUpdate, single, makeMongosStaleFunc); // Not possible because replacement-style requires multi=false. - checkUpdateIsInvalid(rangeQuery, rUpdate, multi, makeMongosStaleFunc); + assertUpdateIsInvalid(rangeQuery, rUpdate, multi, makeMongosStaleFunc); - // Not possible because can't do range query on a single update. - checkUpdateIsInvalid(rangeQuery, oUpdate, single, makeMongosStaleFunc); + // Range query for a single update succeeds because the range falls entirely on one shard. + doUpdate(rangeQuery, oUpdate, single, makeMongosStaleFunc); doUpdate(rangeQuery, oUpdate, multi, makeMongosStaleFunc); doUpdate(multiPointQuery, rUpdate, single, makeMongosStaleFunc); // Not possible because replacement-style requires multi=false. - checkUpdateIsInvalid(multiPointQuery, rUpdate, multi, makeMongosStaleFunc); + assertUpdateIsInvalid(multiPointQuery, rUpdate, multi, makeMongosStaleFunc); - // Not possible because single remove must contain _id or shard key at top level (not within - // $or). - checkUpdateIsInvalid(multiPointQuery, oUpdate, single, makeMongosStaleFunc); + // Multi-point single-doc update succeeds if all points are on a single shard. + assertUpdateIsValidIfAllChunksOnSingleShard( + multiPointQuery, oUpdate, single, makeMongosStaleFunc); doUpdate(multiPointQuery, oUpdate, multi, makeMongosStaleFunc); } // TODO: SERVER-33954 remove shardAsReplicaSet: false. - var st = new ShardingTest({shards: 2, mongos: 2, other: {shardAsReplicaSet: false}}); + const st = new ShardingTest({shards: 2, mongos: 2, other: {shardAsReplicaSet: false}}); - var dbName = 'test'; - var collNS = dbName + '.foo'; - var numShardKeys = 10; - var numDocs = numShardKeys * 2; - var splitPoint = numShardKeys / 2; + const dbName = 'test'; + const collNS = dbName + '.foo'; + const numShardKeys = 10; + const numDocs = numShardKeys * 2; + const splitPoint = numShardKeys / 2; assert.commandWorked(st.s.adminCommand({enableSharding: dbName})); assert.commandWorked(st.s.adminCommand({shardCollection: collNS, key: {x: 1}})); st.ensurePrimaryShard(dbName, st.shard0.shardName); - var freshMongos = st.s0; - var staleMongos = st.s1; + const freshMongos = st.s0; + const staleMongos = st.s1; - var emptyQuery = {}; - var pointQuery = {x: 0}; + const emptyQuery = {}; + const pointQuery = {x: 0}; // Choose a range that would fall on only one shard. // Use (splitPoint - 1) because of SERVER-20768. - var rangeQuery = {x: {$gte: 0, $lt: splitPoint - 1}}; + const rangeQuery = {x: {$gte: 0, $lt: splitPoint - 1}}; // Choose points that would fall on two different shards. - var multiPointQuery = {$or: [{x: 0}, {x: numShardKeys}]}; + const multiPointQuery = {$or: [{x: 0}, {x: numShardKeys}]}; - checkAllRemoveQueries(makeStaleMongosTargetSingleShard); - checkAllRemoveQueries(makeStaleMongosTargetMultipleShards); + checkAllRemoveQueries(makeStaleMongosTargetOneShardWhenAllChunksAreOnAnotherShard); + checkAllRemoveQueries(makeStaleMongosTargetMultipleShardsWhenAllChunksAreOnOneShard); - checkAllUpdateQueries(makeStaleMongosTargetSingleShard); - checkAllUpdateQueries(makeStaleMongosTargetMultipleShards); + checkAllUpdateQueries(makeStaleMongosTargetOneShardWhenAllChunksAreOnAnotherShard); + checkAllUpdateQueries(makeStaleMongosTargetMultipleShardsWhenAllChunksAreOnOneShard); + checkAllUpdateQueries(makeStaleMongosTargetOneShardWhenChunksAreOnMultipleShards); st.stop(); })(); diff --git a/jstests/sharding/update_replace_id.js b/jstests/sharding/update_replace_id.js new file mode 100644 index 00000000000..9fe3538e4f5 --- /dev/null +++ b/jstests/sharding/update_replace_id.js @@ -0,0 +1,166 @@ +/** + * Test to confirm that mongoS's special handling of replacement updates with an exact query on _id + * behaves as expected in the case where a collection's shard key includes _id: + * + * - For update replacements, mongoS combines the _id from the query with the replacement document + * to target the query towards a single shard, rather than scattering to all shards. + * - For upsert replacements, which always require an exact shard key match, mongoS combines the _id + * from the query with the replacement document to produce a complete shard key. + * + * These special cases are allowed because mongoD always propagates the _id of an existing document + * into its replacement, and in the case of an upsert will use the value of _id from the query + * filter. + */ +(function() { + load("jstests/libs/profiler.js"); // For profilerHas*OrThrow helper functions. + + const st = new ShardingTest({shards: 2, mongos: 1, config: 1, other: {enableBalancer: false}}); + + const mongosDB = st.s0.getDB(jsTestName()); + const mongosColl = mongosDB.test; + + const shard0DB = st.shard0.getDB(jsTestName()); + const shard1DB = st.shard1.getDB(jsTestName()); + + assert.commandWorked(mongosDB.dropDatabase()); + + // Enable sharding on the test DB and ensure its primary is shard0. + assert.commandWorked(mongosDB.adminCommand({enableSharding: mongosDB.getName()})); + st.ensurePrimaryShard(mongosDB.getName(), st.shard0.shardName); + + // Enables profiling on both shards so that we can verify the targeting behaviour. + function restartProfiling() { + for (let shardDB of[shard0DB, shard1DB]) { + shardDB.setProfilingLevel(0); + shardDB.system.profile.drop(); + shardDB.setProfilingLevel(2); + } + } + + // Run the set of tests relevant to the given shardKey. + function runReplacementUpdateTests(shardKey) { + // Write a single document to shard0 and verify that it is present. + mongosColl.insert({_id: -100, a: -100, msg: "not_updated"}); + assert.docEq(shard0DB.test.find({_id: -100}).toArray(), + [{_id: -100, a: -100, msg: "not_updated"}]); + + // Write a document with the same key directly to shard1. This simulates an orphaned + // document, or the duplicate document which temporarily exists during a chunk migration. + shard1DB.test.insert({_id: -100, a: -100, msg: "not_updated"}); + + // Clear and restart the profiler on both shards. + restartProfiling(); + + // Perform a replacement update whose query is an exact match on _id and whose replacement + // document contains the remainder of the shard key. Despite the fact that the replacement + // document does not contain the entire shard key, we expect that mongoS will extract the + // _id from the query and combine it with the replacement doc to target a single shard. + let writeRes = assert.commandWorked( + mongosColl.update({_id: -100}, {a: -100, msg: "update_extracted_id_from_query"})); + + // Verify that the update did not modify the orphan document. + assert.docEq(shard1DB.test.find({_id: -100}).toArray(), + [{_id: -100, a: -100, msg: "not_updated"}]); + assert.eq(writeRes.nMatched, 1); + assert.eq(writeRes.nModified, 1); + + // Verify that the update only targeted shard0 and that the resulting document appears as + // expected. + assert.docEq(mongosColl.find({_id: -100}).toArray(), + [{_id: -100, a: -100, msg: "update_extracted_id_from_query"}]); + profilerHasSingleMatchingEntryOrThrow({ + profileDB: shard0DB, + filter: {op: "update", "command.u.msg": "update_extracted_id_from_query"} + }); + profilerHasZeroMatchingEntriesOrThrow({ + profileDB: shard1DB, + filter: {op: "update", "command.u.msg": "update_extracted_id_from_query"} + }); + + // Perform an upsert replacement whose query is an exact match on _id and whose replacement + // doc contains the remainder of the shard key. The _id taken from the query should be used + // both in targeting the update and in generating the new document. + writeRes = assert.commandWorked(mongosColl.update( + {_id: 101}, {a: 101, msg: "upsert_extracted_id_from_query"}, {upsert: true})); + assert.eq(writeRes.nUpserted, 1); + + // Verify that the update only targeted shard1, and that the resulting document appears as + // expected. At this point in the test we expect shard1 to be stale, because it was the + // destination shard for the first moveChunk; we therefore explicitly check the profiler for + // a successful update, i.e. one which did not report a stale config exception. + assert.docEq(mongosColl.find({_id: 101}).toArray(), + [{_id: 101, a: 101, msg: "upsert_extracted_id_from_query"}]); + assert.docEq(shard1DB.test.find({_id: 101}).toArray(), + [{_id: 101, a: 101, msg: "upsert_extracted_id_from_query"}]); + profilerHasZeroMatchingEntriesOrThrow({ + profileDB: shard0DB, + filter: {op: "update", "command.u.msg": "upsert_extracted_id_from_query"} + }); + profilerHasSingleMatchingEntryOrThrow({ + profileDB: shard1DB, + filter: { + op: "update", + "command.u.msg": "upsert_extracted_id_from_query", + errName: {$exists: false} + } + }); + + // The remainder of the tests are only relevant for a compound shard key. If the key is + // {_id: 'hashed'}, we stop at this point. + if (shardKey._id === "hashed") { + return; + } + + // Verify that an update whose query contains an exact match on _id but whose replacement + // doc does not contain all other shard key fields will be rejected by mongoS. + writeRes = assert.commandFailedWithCode( + mongosColl.update({_id: -100, a: -100}, {msg: "update_failed_missing_shard_key_field"}), + ErrorCodes.ShardKeyNotFound); + + // Check that the existing document remains unchanged, and that the update did not reach + // either shard per their respective profilers. + assert.docEq(mongosColl.find({_id: -100, a: -100}).toArray(), + [{_id: -100, a: -100, msg: "update_extracted_id_from_query"}]); + profilerHasZeroMatchingEntriesOrThrow({ + profileDB: shard0DB, + filter: {op: "update", "command.u.msg": "update_failed_missing_shard_key_field"} + }); + profilerHasZeroMatchingEntriesOrThrow({ + profileDB: shard1DB, + filter: {op: "update", "command.u.msg": "update_failed_missing_shard_key_field"} + }); + + // Verify that an upsert whose query contains an exact match on _id but whose replacement + // document does not contain all other shard key fields will be rejected by mongoS, since it + // does not contain an exact shard key match. + writeRes = assert.commandFailedWithCode( + mongosColl.update({_id: 200, a: 200}, {msg: "upsert_targeting_failed"}, {upsert: true}), + ErrorCodes.ShardKeyNotFound); + profilerHasZeroMatchingEntriesOrThrow({ + profileDB: shard0DB, + filter: {op: "update", "command.u.msg": "upsert_targeting_failed"} + }); + profilerHasZeroMatchingEntriesOrThrow({ + profileDB: shard1DB, + filter: {op: "update", "command.u.msg": "upsert_targeting_failed"} + }); + assert.eq(mongosColl.find({_id: 200, a: 200}).itcount(), 0); + } + + // Shard the test collection on {_id: 1, a: 1}, split it into two chunks, and migrate one of + // these to the second shard. + st.shardColl( + mongosColl, {_id: 1, a: 1}, {_id: 0, a: 0}, {_id: 1, a: 1}, mongosDB.getName(), true); + + // Run the replacement behaviour tests that are relevant to a compound key that includes _id. + runReplacementUpdateTests({_id: 1, a: 1}); + + // Drop and reshard the collection on {_id: "hashed"}, which will autosplit across both shards. + assert(mongosColl.drop()); + mongosDB.adminCommand({shardCollection: mongosColl.getFullName(), key: {_id: "hashed"}}); + + // Run the replacement behaviour tests relevant to a collection sharded on {_id: "hashed"}. + runReplacementUpdateTests({_id: "hashed"}); + + st.stop(); +})();
\ No newline at end of file diff --git a/jstests/sharding/update_sharded.js b/jstests/sharding/update_sharded.js index d08d498c613..0873152c4e0 100644 --- a/jstests/sharding/update_sharded.js +++ b/jstests/sharding/update_sharded.js @@ -2,20 +2,25 @@ // since shard key is immutable. (function() { - var s = new ShardingTest({name: "auto1", shards: 2, mongos: 1}); + const s = new ShardingTest({name: "auto1", shards: 2, mongos: 1}); s.adminCommand({enablesharding: "test"}); s.ensurePrimaryShard('test', s.shard1.shardName); - // repeat same tests with hashed shard key, to ensure identical behavior - s.adminCommand({shardcollection: "test.update0", key: {key: 1}}); + db = s.getDB("test"); + + // Repeat same tests with hashed shard key, to ensure identical behavior. + s.shardColl("update0", {key: 1}, {key: 0}, {key: 1}, db.getName(), true); s.adminCommand({shardcollection: "test.update1", key: {key: "hashed"}}); - db = s.getDB("test"); + s.shard0.getDB("admin").setLogLevel(1); + s.shard1.getDB("admin").setLogLevel(1); - for (i = 0; i < 2; i++) { - coll = db.getCollection("update" + i); + for (let i = 0; i < 2; i++) { + const collName = "update" + i; + const hashedKey = (collName == "update1"); + coll = db.getCollection(collName); coll.insert({_id: 1, key: 1}); // these are both upserts @@ -83,13 +88,21 @@ // Invalid extraction of exact key from query assert.writeError(coll.update({}, {$set: {x: 1}}, {multi: false})); - assert.writeError(coll.update({key: {$gt: ObjectId()}}, {$set: {x: 1}}, {multi: false})); - assert.writeError(coll.update( - {$or: [{key: ObjectId()}, {key: ObjectId()}]}, {$set: {x: 1}}, {multi: false})); - assert.writeError(coll.update( - {$and: [{key: ObjectId()}, {key: ObjectId()}]}, {$set: {x: 1}}, {multi: false})); assert.writeError(coll.update({'key.x': ObjectId()}, {$set: {x: 1}}, {multi: false})); + // Inexact queries may target a single shard. Range queries may target a single shard as + // long as the collection is not hashed. + assert[hashedKey ? "writeError" : "writeOK"]( + coll.update({key: {$gt: 0}}, {$set: {x: 1}}, {multi: false})); + // Note: {key:-1} and {key:-2} fall on shard0 for both hashed and ascending shardkeys. + assert.writeOK(coll.update({$or: [{key: -1}, {key: -2}]}, {$set: {x: 1}}, {multi: false})); + assert.writeOK(coll.update({$and: [{key: -1}, {key: -2}]}, {$set: {x: 1}}, {multi: false})); + + // In cases where an inexact query does target multiple shards, single update is rejected. + assert.writeError(coll.update({key: {$gt: MinKey}}, {$set: {x: 1}}, {multi: false})); + assert.writeError( + coll.update({$or: [{key: -10}, {key: 10}]}, {$set: {x: 1}}, {multi: false})); + // Make sure failed shard key or _id extraction doesn't affect the other assert.writeOK(coll.update({'_id.x': ObjectId(), key: 1}, {$set: {x: 1}}, {multi: false})); assert.writeOK(coll.update({_id: ObjectId(), 'key.x': 1}, {$set: {x: 1}}, {multi: false})); diff --git a/src/mongo/s/shard_key_pattern.cpp b/src/mongo/s/shard_key_pattern.cpp index d37df0cceb2..b0996acd647 100644 --- a/src/mongo/s/shard_key_pattern.cpp +++ b/src/mongo/s/shard_key_pattern.cpp @@ -275,6 +275,17 @@ BSONObj ShardKeyPattern::extractShardKeyFromDoc(const BSONObj& doc) const { return extractShardKeyFromMatchable(matchable); } +std::vector<StringData> ShardKeyPattern::findMissingShardKeyFieldsFromDoc(const BSONObj doc) const { + std::vector<StringData> missingFields; + BSONMatchableDocument matchable(doc); + for (const auto& skField : _keyPattern.toBSON()) { + auto matchEl = extractKeyElementFromMatchable(matchable, skField.fieldNameStringData()); + if (!isValidShardKeyElement(matchEl)) + missingFields.emplace_back(skField.fieldNameStringData()); + } + return missingFields; +} + StatusWith<BSONObj> ShardKeyPattern::extractShardKeyFromQuery(OperationContext* opCtx, const BSONObj& basicQuery) const { auto qr = stdx::make_unique<QueryRequest>(NamespaceString("")); diff --git a/src/mongo/s/shard_key_pattern.h b/src/mongo/s/shard_key_pattern.h index fe01252f61a..92fc6eea45f 100644 --- a/src/mongo/s/shard_key_pattern.h +++ b/src/mongo/s/shard_key_pattern.h @@ -145,6 +145,14 @@ public: BSONObj extractShardKeyFromDoc(const BSONObj& doc) const; /** + * Returns the set of shard key fields which are absent from the given document. Note that the + * vector returned by this method contains StringData elements pointing into ShardKeyPattern's + * underlying BSONObj. If the fieldnames are required to survive beyond the lifetime of this + * ShardKeyPattern, callers should create their own copies. + */ + std::vector<StringData> findMissingShardKeyFieldsFromDoc(const BSONObj doc) const; + + /** * Given a simple BSON query, extracts the shard key corresponding to the key pattern * from equality matches in the query. The query expression *must not* be a complex query * with sorts or other attributes. diff --git a/src/mongo/s/write_ops/chunk_manager_targeter.cpp b/src/mongo/s/write_ops/chunk_manager_targeter.cpp index d5e7b58e809..9375405adae 100644 --- a/src/mongo/s/write_ops/chunk_manager_targeter.cpp +++ b/src/mongo/s/write_ops/chunk_manager_targeter.cpp @@ -45,11 +45,13 @@ namespace mongo { namespace { -enum UpdateType { UpdateType_Replacement, UpdateType_OpStyle, UpdateType_Unknown }; - enum CompareResult { CompareResult_Unknown, CompareResult_GTE, CompareResult_LT }; -const ShardKeyPattern virtualIdShardKey(BSON("_id" << 1)); +constexpr auto kIdFieldName = "_id"_sd; + +const ShardKeyPattern kVirtualIdShardKey(BSON(kIdFieldName << 1)); + +using UpdateType = ChunkManagerTargeter::UpdateType; /** * There are two styles of update expressions: @@ -57,34 +59,84 @@ const ShardKeyPattern virtualIdShardKey(BSON("_id" << 1)); * Replacement style: coll.update({ x : 1 }, { y : 2 }) * OpStyle: coll.update({ x : 1 }, { $set : { y : 2 } }) */ -UpdateType getUpdateExprType(const BSONObj& updateExpr) { - // Empty update is replacement-style, by default - if (updateExpr.isEmpty()) { - return UpdateType_Replacement; +StatusWith<UpdateType> getUpdateExprType(const write_ops::UpdateOpEntry& updateDoc) { + // Obtain the update expression from the request. + const auto updateExpr = updateDoc.getU(); + + // Empty update is replacement-style by default. + auto updateType = (updateExpr.isEmpty() ? UpdateType::kReplacement : UpdateType::kUnknown); + + // Make sure that the update expression does not mix $op and non-$op fields. + for (const auto& curField : updateExpr) { + const auto curFieldType = + (curField.fieldNameStringData()[0] == '$' ? UpdateType::kOpStyle + : UpdateType::kReplacement); + + // If the current field's type does not match the existing updateType, abort. + if (updateType != curFieldType && updateType != UpdateType::kUnknown) { + return {ErrorCodes::UnsupportedFormat, + str::stream() << "update document " << updateExpr + << " has mixed $operator and non-$operator style fields"}; + } + updateType = curFieldType; } - UpdateType updateType = UpdateType_Unknown; + if (updateType == UpdateType::kReplacement && updateDoc.getMulti()) { + return {ErrorCodes::InvalidOptions, "Replacement-style updates cannot be {multi:true}"}; + } - BSONObjIterator it(updateExpr); - while (it.more()) { - BSONElement next = it.next(); + return updateType; +} - if (next.fieldName()[0] == '$') { - if (updateType == UpdateType_Unknown) { - updateType = UpdateType_OpStyle; - } else if (updateType == UpdateType_Replacement) { - return UpdateType_Unknown; - } - } else { - if (updateType == UpdateType_Unknown) { - updateType = UpdateType_Replacement; - } else if (updateType == UpdateType_OpStyle) { - return UpdateType_Unknown; - } - } +/** + * Obtain the update expression from the given update doc. If this is a replacement-style update, + * and the shard key includes _id but the replacement document does not, we attempt to find an exact + * _id match in the query component and add it to the doc. We do this because mongoD will propagate + * _id from the existing document if this is an update, and will extract _id from the query when + * generating the new document in the case of an upsert. It is therefore always correct to target + * the operation on the basis of the combined updateExpr and query. + */ +StatusWith<BSONObj> getUpdateExpr(OperationContext* opCtx, + const ShardKeyPattern& shardKeyPattern, + const UpdateType updateType, + const write_ops::UpdateOpEntry& updateDoc) { + // We should never see an invalid update type here. + invariant(updateType != UpdateType::kUnknown); + + // If this is not a replacement update, then the update expression remains unchanged. + if (updateType != UpdateType::kReplacement) { + return updateDoc.getU(); } - return updateType; + // Extract the raw update expression from the request. + auto updateExpr = updateDoc.getU(); + + // Find the set of all shard key fields that are missing from the update expression. + const auto missingFields = shardKeyPattern.findMissingShardKeyFieldsFromDoc(updateExpr); + + // If there are no missing fields, return the update expression as-is. + if (missingFields.empty()) { + return updateExpr; + } + // If there are any missing fields other than _id, then this update can never succeed. + if (missingFields.size() > 1 || *missingFields.begin() != kIdFieldName) { + return {ErrorCodes::ShardKeyNotFound, + str::stream() << "Expected replacement document to include all shard key fields, " + "but the following were omitted: " + << BSON("missingShardKeyFields" << missingFields)}; + } + // If the only missing field is _id, attempt to extract it from an exact match in the update's + // query spec. This will guarantee that we can target a single shard, but it is not necessarily + // fatal if no exact _id can be found. + invariant(missingFields.size() == 1 && *missingFields.begin() == kIdFieldName); + const auto idFromQuery = kVirtualIdShardKey.extractShardKeyFromQuery(opCtx, updateDoc.getQ()); + if (!idFromQuery.isOK()) { + return idFromQuery; + } else if (auto idElt = idFromQuery.getValue()[kIdFieldName]) { + updateExpr = updateExpr.addField(idElt); + } + + return updateExpr; } /** @@ -100,7 +152,7 @@ UpdateType getUpdateExprType(const BSONObj& updateExpr) { * { foo : <anything> } => false */ bool isExactIdQuery(OperationContext* opCtx, const CanonicalQuery& query, ChunkManager* manager) { - auto shardKey = virtualIdShardKey.extractShardKeyFromQuery(query); + auto shardKey = kVirtualIdShardKey.extractShardKeyFromQuery(query); BSONElement idElt = shardKey["_id"]; if (!idElt) { @@ -118,7 +170,24 @@ bool isExactIdQuery(OperationContext* opCtx, const CanonicalQuery& query, ChunkM return true; } +bool isExactIdQuery(OperationContext* opCtx, + const NamespaceString& nss, + const BSONObj query, + const BSONObj collation, + ChunkManager* manager) { + auto qr = stdx::make_unique<QueryRequest>(nss); + qr->setFilter(query); + if (!collation.isEmpty()) { + qr->setCollation(collation); + } + const auto cq = CanonicalQuery::canonicalize(opCtx, + std::move(qr), + nullptr, + ExtensionsCallbackNoop(), + MatchExpressionParser::kAllowAllSpecialFeatures); + return cq.isOK() && isExactIdQuery(opCtx, *cq.getValue(), manager); +} // // Utilities to compare shard versions // @@ -327,125 +396,74 @@ StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetUpdate( // into the query doc, and to correctly support upsert we must target a single shard. // // The rule is simple - If the update is replacement style (no '$set'), we target using the - // update. If the update is not replacement style, we target using the query. + // update. If the update is not replacement style, we target using the query. Because mongoD + // will automatically propagate '_id' from an existing document, and will extract it from an + // exact-match in the query in the case of an upsert, we augment the replacement doc with the + // query's '_id' for targeting purposes, if it exists. // - // If we have the exact shard key in either the query or replacement doc, we target using - // that extracted key. + // Once we have determined the correct component to target on, we attempt to extract an exact + // shard key from it. If one is present, we target using it. // - - BSONObj query = updateDoc.getQ(); - BSONObj updateExpr = updateDoc.getU(); - - UpdateType updateType = getUpdateExprType(updateExpr); - - if (updateType == UpdateType_Unknown) { - return {ErrorCodes::UnsupportedFormat, - str::stream() << "update document " << updateExpr - << " has mixed $operator and non-$operator style fields"}; + const auto updateType = getUpdateExprType(updateDoc); + if (!updateType.isOK()) { + return updateType.getStatus(); } - BSONObj shardKey; - - if (_routingInfo->cm()) { - // - // Sharded collections have the following futher requirements for targeting: - // - // Upserts must be targeted exactly by shard key. - // Non-multi updates must be targeted exactly by shard key *or* exact _id. - // - - // Get the shard key - if (updateType == UpdateType_OpStyle) { - // Target using the query - StatusWith<BSONObj> status = - _routingInfo->cm()->getShardKeyPattern().extractShardKeyFromQuery(opCtx, query); - - // Bad query - if (!status.isOK()) - return status.getStatus(); - - shardKey = status.getValue(); - } else { - // Target using the replacement document - shardKey = _routingInfo->cm()->getShardKeyPattern().extractShardKeyFromDoc(updateExpr); - } - - // Check shard key size on upsert. - if (updateDoc.getUpsert()) { - Status status = ShardKeyPattern::checkShardKeySize(shardKey); - if (!status.isOK()) - return status; + // If the collection is not sharded, forward the update to the primary shard. + if (!_routingInfo->cm()) { + if (!_routingInfo->db().primary()) { + return {ErrorCodes::NamespaceNotFound, + str::stream() << "could not target update on " << getNS().ns() + << "; no metadata found"}; } + return std::vector<ShardEndpoint>{ + {_routingInfo->db().primaryId(), ChunkVersion::UNSHARDED()}}; } + const auto& shardKeyPattern = _routingInfo->cm()->getShardKeyPattern(); const auto collation = write_ops::collationOf(updateDoc); - // Target the shard key, query, or replacement doc - if (!shardKey.isEmpty()) { - try { - return std::vector<ShardEndpoint>{ - _targetShardKey(shardKey, collation, (query.objsize() + updateExpr.objsize()))}; - } catch (const DBException&) { - // This update is potentially not constrained to a single shard + const auto updateExpr = getUpdateExpr(opCtx, shardKeyPattern, updateType.getValue(), updateDoc); + const bool isUpsert = updateDoc.getUpsert(); + const auto query = updateDoc.getQ(); + if (!updateExpr.isOK()) { + return updateExpr.getStatus(); + } + + // We first attempt to target by exact match on the shard key. + const auto swTarget = _targetUpdateByShardKey( + opCtx, updateType.getValue(), query, collation, updateExpr.getValue(), isUpsert); + + // Return the status if we were successful in targeting by shard key. If this is an upsert, then + // we return the status regardless of whether or not we succeeded, since an upsert must always + // target an exact shard key or fail. If this is *not* an upsert and we were unable to target an + // exact shard key, then we proceed to try other means of targeting the update. + if (isUpsert || swTarget.isOK()) { + return swTarget; + } + + // If we could not target by shard key, attempt to route the update by query or replacement doc. + auto shardEndPoints = (updateType.getValue() == UpdateType::kOpStyle + ? _targetQuery(opCtx, query, collation) + : _targetDoc(opCtx, updateExpr.getValue(), collation)); + + // Single (non-multi) updates must target a single shard, or an exact _id. + if (!updateDoc.getMulti() && shardEndPoints.isOK() && shardEndPoints.getValue().size() > 1) { + if (!isExactIdQuery(opCtx, getNS(), query, collation, _routingInfo->cm().get())) { + return {ErrorCodes::InvalidOptions, + str::stream() << "A {multi:false} update on a sharded collection must either " + "contain an exact match on _id (and have the collection " + "default collation) or must target a single shard (and have " + "the simple collation), but this update targeted " + << shardEndPoints.getValue().size() + << " shards. Update request: " + << updateDoc.toBSON() + << ", shard key pattern: " + << shardKeyPattern.toString()}; } } - // We failed to target a single shard. - - // Upserts are required to target a single shard. - if (_routingInfo->cm() && updateDoc.getUpsert()) { - return Status(ErrorCodes::ShardKeyNotFound, - str::stream() << "An upsert on a sharded collection must contain the shard " - "key and have the simple collation. Update request: " - << updateDoc.toBSON() - << ", shard key pattern: " - << _routingInfo->cm()->getShardKeyPattern().toString()); - } - - // Parse update query. - auto qr = stdx::make_unique<QueryRequest>(getNS()); - qr->setFilter(updateDoc.getQ()); - if (!collation.isEmpty()) { - qr->setCollation(collation); - } - // $expr is not allowed in the query for an upsert, since it is not clear what the equality - // extraction behavior for $expr should be. - auto allowedMatcherFeatures = MatchExpressionParser::kAllowAllSpecialFeatures; - if (updateDoc.getUpsert()) { - allowedMatcherFeatures &= ~MatchExpressionParser::AllowedFeatures::kExpr; - } - const boost::intrusive_ptr<ExpressionContext> expCtx; - auto cq = CanonicalQuery::canonicalize( - opCtx, std::move(qr), expCtx, ExtensionsCallbackNoop(), allowedMatcherFeatures); - if (!cq.isOK() && cq.getStatus().code() == ErrorCodes::QueryFeatureNotAllowed) { - // The default error message for disallowed $expr is not descriptive enough, so we rewrite - // it here. - return {ErrorCodes::QueryFeatureNotAllowed, - "$expr is not allowed in the query predicate for an upsert"}; - } - if (!cq.isOK()) { - return cq.getStatus().withContext(str::stream() << "Could not parse update query " - << updateDoc.getQ()); - } - - // Single (non-multi) updates must target a single shard or be exact-ID. - if (_routingInfo->cm() && !updateDoc.getMulti() && - !isExactIdQuery(opCtx, *cq.getValue(), _routingInfo->cm().get())) { - return {ErrorCodes::ShardKeyNotFound, - str::stream() << "A single update on a sharded collection must contain an exact " - "match on _id (and have the collection default collation) or " - "contain the shard key (and have the simple collation). Update " - "request: " - << updateDoc.toBSON() - << ", shard key pattern: " - << _routingInfo->cm()->getShardKeyPattern().toString()}; - } - - if (updateType == UpdateType_OpStyle) { - return _targetQuery(opCtx, query, collation); - } else { - return _targetDoc(opCtx, updateExpr, collation); - } + return shardEndPoints; } StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetDelete( @@ -518,6 +536,58 @@ StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::targetDelete( return _targetQuery(opCtx, deleteDoc.getQ(), collation); } +StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::_targetUpdateByShardKey( + OperationContext* opCtx, + const UpdateType updateType, + const BSONObj query, + const BSONObj collation, + const BSONObj updateExpr, + const bool isUpsert) const { + // This method should only ever be called on a sharded collection with a valid updateType. + invariant(updateType != UpdateType::kUnknown); + invariant(_routingInfo->cm()); + + const auto& shardKeyPattern = _routingInfo->cm()->getShardKeyPattern(); + + // Attempt to extract the shard key from the query (for an op-style update) or the update + // expression document (for a replacement-style update). + const auto shardKey = + (updateType == UpdateType::kOpStyle ? shardKeyPattern.extractShardKeyFromQuery(opCtx, query) + : shardKeyPattern.extractShardKeyFromDoc(updateExpr)); + if (!shardKey.isOK()) { + return shardKey.getStatus(); + } + + // Attempt to dispatch the update by routing on the extracted shard key. + if (!shardKey.getValue().isEmpty()) { + // Verify that the shard key does not exceed the maximum permitted size. + const auto shardKeySizeStatus = ShardKeyPattern::checkShardKeySize(shardKey.getValue()); + if (!shardKeySizeStatus.isOK()) { + return shardKeySizeStatus; + } + try { + const long long estimatedDataSize = query.objsize() + updateExpr.objsize(); + return std::vector<ShardEndpoint>{ + _targetShardKey(shardKey.getValue(), collation, estimatedDataSize)}; + } catch (const DBException& ex) { + // The update is potentially not constrained to a single shard. If this is an upsert, + // then we do not return the error here; we provide a more descriptive message below. + if (!isUpsert) + return ex.toStatus(); + } + } + return { + ErrorCodes::ShardKeyNotFound, + str::stream() + << (isUpsert + ? "Sharded upserts must contain the shard key and have the simple collation. " + : "Could not extract an exact shard key value. ") + << "Update request: " + << BSON("q" << query << "u" << updateExpr) + << ", shard key pattern: " + << shardKeyPattern.toString()}; +} + StatusWith<std::vector<ShardEndpoint>> ChunkManagerTargeter::_targetDoc( OperationContext* opCtx, const BSONObj& doc, const BSONObj& collation) const { // NOTE: This is weird and fragile, but it's the way our language works right now - documents diff --git a/src/mongo/s/write_ops/chunk_manager_targeter.h b/src/mongo/s/write_ops/chunk_manager_targeter.h index 6de41ed3fe7..9575d2460f7 100644 --- a/src/mongo/s/write_ops/chunk_manager_targeter.h +++ b/src/mongo/s/write_ops/chunk_manager_targeter.h @@ -54,6 +54,8 @@ struct TargeterStats { */ class ChunkManagerTargeter : public NSTargeter { public: + enum class UpdateType { kReplacement, kOpStyle, kUnknown }; + ChunkManagerTargeter(const NamespaceString& nss, TargeterStats* stats); /** @@ -106,6 +108,19 @@ private: Status _refreshNow(OperationContext* opCtx); /** + * Attempts to route an update operation by extracting an exact shard key from the given query + * and/or update expression. Should only be called on sharded collections, and with a valid + * updateType. Returns not-OK if the user's query is invalid, if the extracted shard key exceeds + * the maximum allowed length, or if the update could not be targeted by exact shard key. + */ + StatusWith<std::vector<ShardEndpoint>> _targetUpdateByShardKey(OperationContext* opCtx, + const UpdateType updateType, + const BSONObj query, + const BSONObj collation, + const BSONObj updateExpr, + const bool isUpsert) const; + + /** * Returns a vector of ShardEndpoints where a document might need to be placed. * * Returns !OK with message if replacement could not be targeted @@ -135,7 +150,7 @@ private: * * If 'collation' is empty, we use the collection default collation for targeting. */ - ShardEndpoint _targetShardKey(const BSONObj& doc, + ShardEndpoint _targetShardKey(const BSONObj& shardKey, const BSONObj& collation, long long estDataSize) const; |