diff options
author | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2020-03-02 05:29:15 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-03-02 11:10:34 +0000 |
commit | 60b4902efe05c84d938f6ac600f406d53de0eb6f (patch) | |
tree | 2491df3e1a2addf17acef360771a9d2f6d558a7f | |
parent | 8c52aff50122fd92dd3d8216b970cb00bd4a6345 (diff) | |
download | mongo-60b4902efe05c84d938f6ac600f406d53de0eb6f.tar.gz |
SERVER-44598 Shards do not treat the IGNORED version as "intended sharded"
(cherry picked from commit 7604dad6da718751ad1f04bae1c839e87a1f8651)
(cherry picked from commit 5868a00ce71d85b6063489909b5ae79bc7369338)
-rw-r--r-- | buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml | 4 | ||||
-rw-r--r-- | jstests/readonly/write_ops.js | 11 | ||||
-rw-r--r-- | jstests/sharding/change_streams_update_lookup_shard_metadata_missing.js | 38 | ||||
-rw-r--r-- | jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js | 40 | ||||
-rw-r--r-- | src/mongo/db/s/collection_sharding_state.cpp | 26 | ||||
-rw-r--r-- | src/mongo/s/chunk_version.cpp | 16 | ||||
-rw-r--r-- | src/mongo/s/chunk_version.h | 22 | ||||
-rw-r--r-- | src/mongo/s/write_ops/write_op.cpp | 1 |
8 files changed, 127 insertions, 31 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml index 2171d2d739e..ff964dd230e 100644 --- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml +++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml @@ -116,8 +116,10 @@ selector: - jstests/sharding/explain_cmd.js # Enable when SERVER-44733 is backported - jstests/sharding/change_streams_update_lookup_shard_metadata_missing.js - # Enable after SERVER-44477 is backported and available on older binaries. + # Disabled due to missing functionality - jstests/sharding/mr_merge_to_existing.js + # Disabled due to missing functionality + - jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js executor: config: shell_options: diff --git a/jstests/readonly/write_ops.js b/jstests/readonly/write_ops.js index a5d43077cbc..03740041abc 100644 --- a/jstests/readonly/write_ops.js +++ b/jstests/readonly/write_ops.js @@ -1,5 +1,8 @@ load("jstests/readonly/lib/read_only_test.js"); - +/* + * TODO SERVER-45483: The write command should only be returning an + * IlegalOperation + */ runReadOnlyTest(function() { 'use strict'; return { @@ -11,19 +14,19 @@ runReadOnlyTest(function() { // Test that insert fails. assert.writeErrorWithCode( readableCollection.insert({x: 2}), - ErrorCodes.IllegalOperation, + [ErrorCodes.IllegalOperation, ErrorCodes.MultipleErrorsOccurred], "Expected insert to fail because database is in read-only mode"); // Test that delete fails. assert.writeErrorWithCode( readableCollection.remove({x: 1}), - ErrorCodes.IllegalOperation, + [ErrorCodes.IllegalOperation, ErrorCodes.MultipleErrorsOccurred], "Expected remove to fail because database is in read-only mode"); // Test that update fails. assert.writeErrorWithCode( readableCollection.update({_id: 0}, {$inc: {x: 1}}), - ErrorCodes.IllegalOperation, + [ErrorCodes.IllegalOperation, ErrorCodes.MultipleErrorsOccurred], "Expected update to fail because database is in read-only mode"); } }; diff --git a/jstests/sharding/change_streams_update_lookup_shard_metadata_missing.js b/jstests/sharding/change_streams_update_lookup_shard_metadata_missing.js index 7e35b201438..7b37c80d0bf 100644 --- a/jstests/sharding/change_streams_update_lookup_shard_metadata_missing.js +++ b/jstests/sharding/change_streams_update_lookup_shard_metadata_missing.js @@ -1,8 +1,7 @@ /** - * Tests that an updateLookup change stream throws ChangeStreamFatalError when it encounters an - * oplog entry whose documentKey omits the shard key. - * TODO SERVER-44598: the oplog entry will no longer omit the shard key when SERVER-44598 is fixed, - * and so this test will no longer be relevant. + * Tests that an updateLookup change stream doesn't throw ChangeStreamFatalError after + * fixing SERVER-44598 + * * @tags: [uses_change_streams] */ (function() { @@ -39,12 +38,26 @@ const resumeToken = csCursor.next()._id; + // get any secondary + const newPrimary = st.rs0.getSecondary(); + let shards = st.s.getDB('config').shards.find().toArray(); + // Step up one of the Secondaries, which will not have any sharding metadata loaded. - shard0.stepUpNoAwaitReplication(shard0.getSecondary()); + st.rs0.stepUpNoAwaitReplication(newPrimary); + + // make sure the mongos refreshes it's connections to the shard + let primary = {}; + do { + let connPoolStats = st.s0.adminCommand({connPoolStats: 1}); + primary = connPoolStats.replicaSets[shards[0]._id].hosts.find((host) => { + return host.ismaster; + }) || + {}; + } while (newPrimary.host !== primary.addr); // Do a {multi:true} update. This will scatter to all shards and update the document on shard0. - // Because no metadata is loaded, this will write the update into the oplog with a documentKey - // containing only the _id field. + // Because no metadata is loaded, the shard will return a StaleShardVersion and fetch it, + // the operation will be retried assert.soonNoExcept(() => assert.commandWorked( mongosColl.update({_id: 0}, {$set: {updated: true}}, false, true))); @@ -54,12 +67,11 @@ cursor: {} })); - // Begin pulling from the stream. We should hit a ChangeStreamFatalError when the updateLookup - // attempts to read the update entry that is missing the shard key value of the document. - assert.soonNoExcept( - () => assert.commandFailedWithCode( - mongosColl.runCommand({getMore: cmdRes.cursor.id, collection: mongosColl.getName()}), - ErrorCodes.ChangeStreamFatalError)); + assert.soon(() => csCursor.hasNext()); + + const updateObj = csCursor.next(); + + assert.eq(true, updateObj.updateDescription.updatedFields.updated); st.stop(); })();
\ No newline at end of file diff --git a/jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js b/jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js new file mode 100644 index 00000000000..3947c38f048 --- /dev/null +++ b/jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js @@ -0,0 +1,40 @@ +/** + * Tests that after a restart on a shard a multi-write operation will not be repeated + * + * This test requrires persistence because it asumes the shard will still have it's + * data after restarting + * + * @tags: [requires_persistence] + */ +(function() { + 'use strict'; + var st = new ShardingTest({shards: 2, mongos: 1}); + + st.enableSharding('TestDB'); + st.ensurePrimaryShard('TestDB', st.rs1.getURL()); + + // Hash shard the collection so that the chunks are placed uniformly across the shards from the + // beginning (because the collection is empty) + st.shardCollection('TestDB.TestColl', {Key: 'hashed'}, false, {numInitialChunks: 120}); + // Helper function to restart a node and wait that the entire set is operational + + // Insert a document outside the shard key range so the update below will generate a scather- + // gather write operation + st.s0.getDB('TestDB').TestColl.insert({Key: null, X: 'Key 0', inc: 0}); + + // Restart shard 0, given the fact that the ssv flag is true, then + // mongos should send the info only once + + st.restartShardRS(0); + + // Do a scather-gather write operation. One of the shards have been restarted + // so StaleShardVersion should be returned, and the multi-write should be + // executed only once per shard + var bulkOp = st.s0.getDB('TestDB').TestColl.initializeUnorderedBulkOp(); + bulkOp.find({X: 'Key 0'}).update({$inc: {inc: 1}}); + bulkOp.execute(); + + // Make sure inc have been incremented only 1 time + assert.eq(1, st.s0.getDB('TestDB').TestColl.findOne({X: 'Key 0'}).inc); + st.stop(); +})(); diff --git a/src/mongo/db/s/collection_sharding_state.cpp b/src/mongo/db/s/collection_sharding_state.cpp index 8a3e0304146..a49b795832f 100644 --- a/src/mongo/db/s/collection_sharding_state.cpp +++ b/src/mongo/db/s/collection_sharding_state.cpp @@ -198,17 +198,16 @@ void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) return; const auto& receivedShardVersion = *optReceivedShardVersion; - if (ChunkVersion::isIgnoredVersion(receivedShardVersion)) { - return; - } // An operation with read concern 'available' should never have shardVersion set. invariant(repl::ReadConcernArgs::get(opCtx).getLevel() != repl::ReadConcernLevel::kAvailableReadConcern); - const auto metadata = getCurrentMetadata(); - const auto wantedShardVersion = - metadata->isSharded() ? metadata->getShardVersion() : ChunkVersion::UNSHARDED(); + const auto metadata = _getMetadata(boost::none); + auto wantedShardVersion = ChunkVersion::UNSHARDED(); + if (metadata && (*metadata)->isSharded()) { + wantedShardVersion = (*metadata)->getShardVersion(); + } auto criticalSectionSignal = _critSec.getSignal(opCtx->lockState()->isWriteLocked() ? ShardingMigrationCriticalSection::kWrite @@ -223,10 +222,6 @@ void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) str::stream() << "migration commit in progress for " << _nss.ns()); } - if (receivedShardVersion.isWriteCompatibleWith(wantedShardVersion)) { - return; - } - // // Figure out exactly why not compatible, send appropriate error message // The versions themselves are returned in the error, so not needed in messages here @@ -234,6 +229,17 @@ void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx) StaleConfigInfo sci(_nss, receivedShardVersion, wantedShardVersion); + if (ChunkVersion::isIgnoredVersion(receivedShardVersion)) { + uassert(std::move(sci), + "no metadata found on multi-write operation, need to refresh", + !receivedShardVersion.canThrowSSVOnIgnored() || metadata); + return; + } + + if (receivedShardVersion.isWriteCompatibleWith(wantedShardVersion)) { + return; + } + uassert(std::move(sci), str::stream() << "epoch mismatch detected for " << _nss.ns() << ", " << "the collection may have been dropped and recreated", diff --git a/src/mongo/s/chunk_version.cpp b/src/mongo/s/chunk_version.cpp index 7074472584f..ebf4249c762 100644 --- a/src/mongo/s/chunk_version.cpp +++ b/src/mongo/s/chunk_version.cpp @@ -77,6 +77,13 @@ StatusWith<ChunkVersion> ChunkVersion::parseWithField(const BSONObj& obj, String version._epoch = epochPart.OID(); } + if (it.more()) { + // Expect _canThrowSSVOnIgnored + BSONElement canThrowSSVOnIgnoredPart = it.next(); + + version._canThrowSSVOnIgnored = canThrowSSVOnIgnoredPart && canThrowSSVOnIgnoredPart.Bool(); + } + return version; } @@ -114,6 +121,9 @@ void ChunkVersion::appendWithField(BSONObjBuilder* out, StringData field) const BSONArrayBuilder arr(out->subarrayStart(field)); arr.appendTimestamp(_combined); arr.append(_epoch); + if (_canThrowSSVOnIgnored) { + arr.append(_canThrowSSVOnIgnored); + } } void ChunkVersion::appendLegacyWithField(BSONObjBuilder* out, StringData field) const { @@ -125,11 +135,15 @@ BSONObj ChunkVersion::toBSON() const { BSONArrayBuilder b; b.appendTimestamp(_combined); b.append(_epoch); + if (_canThrowSSVOnIgnored) { + b.append(_canThrowSSVOnIgnored); + } return b.arr(); } std::string ChunkVersion::toString() const { - return str::stream() << majorVersion() << "|" << minorVersion() << "||" << _epoch; + return str::stream() << majorVersion() << "|" << minorVersion() << "||" << _epoch + << (_canThrowSSVOnIgnored ? "|||canThrowSSVOnIgnored" : ""); } } // namespace mongo diff --git a/src/mongo/s/chunk_version.h b/src/mongo/s/chunk_version.h index 89ce548b032..62edd814d35 100644 --- a/src/mongo/s/chunk_version.h +++ b/src/mongo/s/chunk_version.h @@ -57,11 +57,12 @@ public: */ static constexpr StringData kShardVersionField = "shardVersion"_sd; - ChunkVersion() : _combined(0), _epoch(OID()) {} + ChunkVersion() : _combined(0), _epoch(OID()), _canThrowSSVOnIgnored(false) {} ChunkVersion(uint32_t major, uint32_t minor, const OID& epoch) : _combined(static_cast<uint64_t>(minor) | (static_cast<uint64_t>(major) << 32)), - _epoch(epoch) {} + _epoch(epoch), + _canThrowSSVOnIgnored(false) {} static StatusWith<ChunkVersion> parseFromCommand(const BSONObj& obj) { return parseWithField(obj, kShardVersionField); @@ -112,6 +113,14 @@ public: version.epoch() == IGNORED().epoch(); } + /** + * Indicates that the shard version checking must be skipped but StaleShardVersion error + * must be thrown if the metadata is not loaded + */ + void setCanThrowSSVOnIgnored() { + _canThrowSSVOnIgnored = true; + } + void incMajor() { uassert( 31180, @@ -153,6 +162,9 @@ public: return _epoch; } + bool canThrowSSVOnIgnored() const { + return _canThrowSSVOnIgnored; + } // // Explicit comparison operators - versions with epochs have non-trivial comparisons. // > < operators do not check epoch cases. Generally if using == we need to handle @@ -223,6 +235,12 @@ public: private: uint64_t _combined; OID _epoch; + + /** + * Temporary flag to indicate shards that a router is able to process and retry + * multi-write operations. + */ + bool _canThrowSSVOnIgnored; }; inline std::ostream& operator<<(std::ostream& s, const ChunkVersion& v) { diff --git a/src/mongo/s/write_ops/write_op.cpp b/src/mongo/s/write_ops/write_op.cpp index d37a4afdd2e..487756c7356 100644 --- a/src/mongo/s/write_ops/write_op.cpp +++ b/src/mongo/s/write_ops/write_op.cpp @@ -105,6 +105,7 @@ Status WriteOp::targetWrites(OperationContext* opCtx, // For now, multiple endpoints imply no versioning - we can't retry half a multi-write if (endpoints.size() > 1u) { endpoint.shardVersion = ChunkVersion::IGNORED(); + endpoint.shardVersion.setCanThrowSSVOnIgnored(); } targetedWrites->push_back(new TargetedWrite(std::move(endpoint), ref)); |