summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcos José Grillo Ramírez <marcos.grillo@10gen.com>2020-01-27 13:08:44 +0000
committerevergreen <evergreen@mongodb.com>2020-01-27 13:08:44 +0000
commitf2eee7f4879a0dda72b281c8d1d25f2afc4c5eb7 (patch)
tree8d60ea0836cf450d1643419dd4abd51c847e2a8d
parent1a01c53df8f7c1e016c0ccbc38b77f6b3508bf65 (diff)
downloadmongo-f2eee7f4879a0dda72b281c8d1d25f2afc4c5eb7.tar.gz
SERVER-44598 Shards do not treat the IGNORED version as "intended sharded"
(cherry picked from commit 7604dad6da718751ad1f04bae1c839e87a1f8651) (cherry picked from commit 5868a00ce71d85b6063489909b5ae79bc7369338)
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml4
-rw-r--r--jstests/readonly/write_ops.js11
-rw-r--r--jstests/sharding/change_streams_update_lookup_shard_metadata_missing.js38
-rw-r--r--jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js40
-rw-r--r--src/mongo/db/s/collection_sharding_state.cpp26
-rw-r--r--src/mongo/s/chunk_version.cpp16
-rw-r--r--src/mongo/s/chunk_version.h22
-rw-r--r--src/mongo/s/write_ops/write_op.cpp1
8 files changed, 127 insertions, 31 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
index 2171d2d739e..ff964dd230e 100644
--- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
@@ -116,8 +116,10 @@ selector:
- jstests/sharding/explain_cmd.js
# Enable when SERVER-44733 is backported
- jstests/sharding/change_streams_update_lookup_shard_metadata_missing.js
- # Enable after SERVER-44477 is backported and available on older binaries.
+ # Disabled due to missing functionality
- jstests/sharding/mr_merge_to_existing.js
+ # Disabled due to missing functionality
+ - jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js
executor:
config:
shell_options:
diff --git a/jstests/readonly/write_ops.js b/jstests/readonly/write_ops.js
index a5d43077cbc..03740041abc 100644
--- a/jstests/readonly/write_ops.js
+++ b/jstests/readonly/write_ops.js
@@ -1,5 +1,8 @@
load("jstests/readonly/lib/read_only_test.js");
-
+/*
+ * TODO SERVER-45483: The write command should only be returning an
+ * IlegalOperation
+ */
runReadOnlyTest(function() {
'use strict';
return {
@@ -11,19 +14,19 @@ runReadOnlyTest(function() {
// Test that insert fails.
assert.writeErrorWithCode(
readableCollection.insert({x: 2}),
- ErrorCodes.IllegalOperation,
+ [ErrorCodes.IllegalOperation, ErrorCodes.MultipleErrorsOccurred],
"Expected insert to fail because database is in read-only mode");
// Test that delete fails.
assert.writeErrorWithCode(
readableCollection.remove({x: 1}),
- ErrorCodes.IllegalOperation,
+ [ErrorCodes.IllegalOperation, ErrorCodes.MultipleErrorsOccurred],
"Expected remove to fail because database is in read-only mode");
// Test that update fails.
assert.writeErrorWithCode(
readableCollection.update({_id: 0}, {$inc: {x: 1}}),
- ErrorCodes.IllegalOperation,
+ [ErrorCodes.IllegalOperation, ErrorCodes.MultipleErrorsOccurred],
"Expected update to fail because database is in read-only mode");
}
};
diff --git a/jstests/sharding/change_streams_update_lookup_shard_metadata_missing.js b/jstests/sharding/change_streams_update_lookup_shard_metadata_missing.js
index 7e35b201438..7b37c80d0bf 100644
--- a/jstests/sharding/change_streams_update_lookup_shard_metadata_missing.js
+++ b/jstests/sharding/change_streams_update_lookup_shard_metadata_missing.js
@@ -1,8 +1,7 @@
/**
- * Tests that an updateLookup change stream throws ChangeStreamFatalError when it encounters an
- * oplog entry whose documentKey omits the shard key.
- * TODO SERVER-44598: the oplog entry will no longer omit the shard key when SERVER-44598 is fixed,
- * and so this test will no longer be relevant.
+ * Tests that an updateLookup change stream doesn't throw ChangeStreamFatalError after
+ * fixing SERVER-44598
+ *
* @tags: [uses_change_streams]
*/
(function() {
@@ -39,12 +38,26 @@
const resumeToken = csCursor.next()._id;
+ // get any secondary
+ const newPrimary = st.rs0.getSecondary();
+ let shards = st.s.getDB('config').shards.find().toArray();
+
// Step up one of the Secondaries, which will not have any sharding metadata loaded.
- shard0.stepUpNoAwaitReplication(shard0.getSecondary());
+ st.rs0.stepUpNoAwaitReplication(newPrimary);
+
+ // make sure the mongos refreshes it's connections to the shard
+ let primary = {};
+ do {
+ let connPoolStats = st.s0.adminCommand({connPoolStats: 1});
+ primary = connPoolStats.replicaSets[shards[0]._id].hosts.find((host) => {
+ return host.ismaster;
+ }) ||
+ {};
+ } while (newPrimary.host !== primary.addr);
// Do a {multi:true} update. This will scatter to all shards and update the document on shard0.
- // Because no metadata is loaded, this will write the update into the oplog with a documentKey
- // containing only the _id field.
+ // Because no metadata is loaded, the shard will return a StaleShardVersion and fetch it,
+ // the operation will be retried
assert.soonNoExcept(() => assert.commandWorked(
mongosColl.update({_id: 0}, {$set: {updated: true}}, false, true)));
@@ -54,12 +67,11 @@
cursor: {}
}));
- // Begin pulling from the stream. We should hit a ChangeStreamFatalError when the updateLookup
- // attempts to read the update entry that is missing the shard key value of the document.
- assert.soonNoExcept(
- () => assert.commandFailedWithCode(
- mongosColl.runCommand({getMore: cmdRes.cursor.id, collection: mongosColl.getName()}),
- ErrorCodes.ChangeStreamFatalError));
+ assert.soon(() => csCursor.hasNext());
+
+ const updateObj = csCursor.next();
+
+ assert.eq(true, updateObj.updateDescription.updatedFields.updated);
st.stop();
})(); \ No newline at end of file
diff --git a/jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js b/jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js
new file mode 100644
index 00000000000..3947c38f048
--- /dev/null
+++ b/jstests/sharding/ssv_after_restart_of_shards_and_mongos_workarround.js
@@ -0,0 +1,40 @@
+/**
+ * Tests that after a restart on a shard a multi-write operation will not be repeated
+ *
+ * This test requrires persistence because it asumes the shard will still have it's
+ * data after restarting
+ *
+ * @tags: [requires_persistence]
+ */
+(function() {
+ 'use strict';
+ var st = new ShardingTest({shards: 2, mongos: 1});
+
+ st.enableSharding('TestDB');
+ st.ensurePrimaryShard('TestDB', st.rs1.getURL());
+
+ // Hash shard the collection so that the chunks are placed uniformly across the shards from the
+ // beginning (because the collection is empty)
+ st.shardCollection('TestDB.TestColl', {Key: 'hashed'}, false, {numInitialChunks: 120});
+ // Helper function to restart a node and wait that the entire set is operational
+
+ // Insert a document outside the shard key range so the update below will generate a scather-
+ // gather write operation
+ st.s0.getDB('TestDB').TestColl.insert({Key: null, X: 'Key 0', inc: 0});
+
+ // Restart shard 0, given the fact that the ssv flag is true, then
+ // mongos should send the info only once
+
+ st.restartShardRS(0);
+
+ // Do a scather-gather write operation. One of the shards have been restarted
+ // so StaleShardVersion should be returned, and the multi-write should be
+ // executed only once per shard
+ var bulkOp = st.s0.getDB('TestDB').TestColl.initializeUnorderedBulkOp();
+ bulkOp.find({X: 'Key 0'}).update({$inc: {inc: 1}});
+ bulkOp.execute();
+
+ // Make sure inc have been incremented only 1 time
+ assert.eq(1, st.s0.getDB('TestDB').TestColl.findOne({X: 'Key 0'}).inc);
+ st.stop();
+})();
diff --git a/src/mongo/db/s/collection_sharding_state.cpp b/src/mongo/db/s/collection_sharding_state.cpp
index 8a3e0304146..a49b795832f 100644
--- a/src/mongo/db/s/collection_sharding_state.cpp
+++ b/src/mongo/db/s/collection_sharding_state.cpp
@@ -198,17 +198,16 @@ void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx)
return;
const auto& receivedShardVersion = *optReceivedShardVersion;
- if (ChunkVersion::isIgnoredVersion(receivedShardVersion)) {
- return;
- }
// An operation with read concern 'available' should never have shardVersion set.
invariant(repl::ReadConcernArgs::get(opCtx).getLevel() !=
repl::ReadConcernLevel::kAvailableReadConcern);
- const auto metadata = getCurrentMetadata();
- const auto wantedShardVersion =
- metadata->isSharded() ? metadata->getShardVersion() : ChunkVersion::UNSHARDED();
+ const auto metadata = _getMetadata(boost::none);
+ auto wantedShardVersion = ChunkVersion::UNSHARDED();
+ if (metadata && (*metadata)->isSharded()) {
+ wantedShardVersion = (*metadata)->getShardVersion();
+ }
auto criticalSectionSignal = _critSec.getSignal(opCtx->lockState()->isWriteLocked()
? ShardingMigrationCriticalSection::kWrite
@@ -223,10 +222,6 @@ void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx)
str::stream() << "migration commit in progress for " << _nss.ns());
}
- if (receivedShardVersion.isWriteCompatibleWith(wantedShardVersion)) {
- return;
- }
-
//
// Figure out exactly why not compatible, send appropriate error message
// The versions themselves are returned in the error, so not needed in messages here
@@ -234,6 +229,17 @@ void CollectionShardingState::checkShardVersionOrThrow(OperationContext* opCtx)
StaleConfigInfo sci(_nss, receivedShardVersion, wantedShardVersion);
+ if (ChunkVersion::isIgnoredVersion(receivedShardVersion)) {
+ uassert(std::move(sci),
+ "no metadata found on multi-write operation, need to refresh",
+ !receivedShardVersion.canThrowSSVOnIgnored() || metadata);
+ return;
+ }
+
+ if (receivedShardVersion.isWriteCompatibleWith(wantedShardVersion)) {
+ return;
+ }
+
uassert(std::move(sci),
str::stream() << "epoch mismatch detected for " << _nss.ns() << ", "
<< "the collection may have been dropped and recreated",
diff --git a/src/mongo/s/chunk_version.cpp b/src/mongo/s/chunk_version.cpp
index 7074472584f..ebf4249c762 100644
--- a/src/mongo/s/chunk_version.cpp
+++ b/src/mongo/s/chunk_version.cpp
@@ -77,6 +77,13 @@ StatusWith<ChunkVersion> ChunkVersion::parseWithField(const BSONObj& obj, String
version._epoch = epochPart.OID();
}
+ if (it.more()) {
+ // Expect _canThrowSSVOnIgnored
+ BSONElement canThrowSSVOnIgnoredPart = it.next();
+
+ version._canThrowSSVOnIgnored = canThrowSSVOnIgnoredPart && canThrowSSVOnIgnoredPart.Bool();
+ }
+
return version;
}
@@ -114,6 +121,9 @@ void ChunkVersion::appendWithField(BSONObjBuilder* out, StringData field) const
BSONArrayBuilder arr(out->subarrayStart(field));
arr.appendTimestamp(_combined);
arr.append(_epoch);
+ if (_canThrowSSVOnIgnored) {
+ arr.append(_canThrowSSVOnIgnored);
+ }
}
void ChunkVersion::appendLegacyWithField(BSONObjBuilder* out, StringData field) const {
@@ -125,11 +135,15 @@ BSONObj ChunkVersion::toBSON() const {
BSONArrayBuilder b;
b.appendTimestamp(_combined);
b.append(_epoch);
+ if (_canThrowSSVOnIgnored) {
+ b.append(_canThrowSSVOnIgnored);
+ }
return b.arr();
}
std::string ChunkVersion::toString() const {
- return str::stream() << majorVersion() << "|" << minorVersion() << "||" << _epoch;
+ return str::stream() << majorVersion() << "|" << minorVersion() << "||" << _epoch
+ << (_canThrowSSVOnIgnored ? "|||canThrowSSVOnIgnored" : "");
}
} // namespace mongo
diff --git a/src/mongo/s/chunk_version.h b/src/mongo/s/chunk_version.h
index 89ce548b032..62edd814d35 100644
--- a/src/mongo/s/chunk_version.h
+++ b/src/mongo/s/chunk_version.h
@@ -57,11 +57,12 @@ public:
*/
static constexpr StringData kShardVersionField = "shardVersion"_sd;
- ChunkVersion() : _combined(0), _epoch(OID()) {}
+ ChunkVersion() : _combined(0), _epoch(OID()), _canThrowSSVOnIgnored(false) {}
ChunkVersion(uint32_t major, uint32_t minor, const OID& epoch)
: _combined(static_cast<uint64_t>(minor) | (static_cast<uint64_t>(major) << 32)),
- _epoch(epoch) {}
+ _epoch(epoch),
+ _canThrowSSVOnIgnored(false) {}
static StatusWith<ChunkVersion> parseFromCommand(const BSONObj& obj) {
return parseWithField(obj, kShardVersionField);
@@ -112,6 +113,14 @@ public:
version.epoch() == IGNORED().epoch();
}
+ /**
+ * Indicates that the shard version checking must be skipped but StaleShardVersion error
+ * must be thrown if the metadata is not loaded
+ */
+ void setCanThrowSSVOnIgnored() {
+ _canThrowSSVOnIgnored = true;
+ }
+
void incMajor() {
uassert(
31180,
@@ -153,6 +162,9 @@ public:
return _epoch;
}
+ bool canThrowSSVOnIgnored() const {
+ return _canThrowSSVOnIgnored;
+ }
//
// Explicit comparison operators - versions with epochs have non-trivial comparisons.
// > < operators do not check epoch cases. Generally if using == we need to handle
@@ -223,6 +235,12 @@ public:
private:
uint64_t _combined;
OID _epoch;
+
+ /**
+ * Temporary flag to indicate shards that a router is able to process and retry
+ * multi-write operations.
+ */
+ bool _canThrowSSVOnIgnored;
};
inline std::ostream& operator<<(std::ostream& s, const ChunkVersion& v) {
diff --git a/src/mongo/s/write_ops/write_op.cpp b/src/mongo/s/write_ops/write_op.cpp
index d37a4afdd2e..487756c7356 100644
--- a/src/mongo/s/write_ops/write_op.cpp
+++ b/src/mongo/s/write_ops/write_op.cpp
@@ -105,6 +105,7 @@ Status WriteOp::targetWrites(OperationContext* opCtx,
// For now, multiple endpoints imply no versioning - we can't retry half a multi-write
if (endpoints.size() > 1u) {
endpoint.shardVersion = ChunkVersion::IGNORED();
+ endpoint.shardVersion.setCanThrowSSVOnIgnored();
}
targetedWrites->push_back(new TargetedWrite(std::move(endpoint), ref));