diff options
author | Siyuan Zhou <visualzhou@gmail.com> | 2016-07-28 21:25:02 -0400 |
---|---|---|
committer | Siyuan Zhou <visualzhou@gmail.com> | 2016-07-29 17:23:30 -0400 |
commit | 350efef02455cae900d9d781c8dadb7911fee47d (patch) | |
tree | b981623998162f25ace4804771a3c0c2b0aec45c | |
parent | d57ebc808dcafef159c7d56b738854816df29d1c (diff) | |
download | mongo-350efef02455cae900d9d781c8dadb7911fee47d.tar.gz |
SERVER-24630 Mongos erroneously advances config optime for writes that fail write concern.
Use getLastOpCommitted rather than getLastOpVisible in sharding.
-rw-r--r-- | src/mongo/s/catalog/replset/catalog_manager_replica_set_test.cpp | 20 | ||||
-rw-r--r-- | src/mongo/s/client/shard_registry.cpp | 12 | ||||
-rw-r--r-- | src/mongo/s/client/shard_registry.h | 1 |
3 files changed, 19 insertions, 14 deletions
diff --git a/src/mongo/s/catalog/replset/catalog_manager_replica_set_test.cpp b/src/mongo/s/catalog/replset/catalog_manager_replica_set_test.cpp index d62025ef97b..0cb3870935b 100644 --- a/src/mongo/s/catalog/replset/catalog_manager_replica_set_test.cpp +++ b/src/mongo/s/catalog/replset/catalog_manager_replica_set_test.cpp @@ -117,7 +117,7 @@ TEST_F(CatalogManagerReplSetTest, GetCollectionExisting) { checkReadConcern(request.cmdObj, Timestamp(0, 0), repl::OpTime::kUninitializedTerm); - ReplSetMetadata metadata(10, OpTime(), newOpTime, 100, OID(), 30, -1); + ReplSetMetadata metadata(10, newOpTime, newOpTime, 100, OID(), 30, -1); BSONObjBuilder builder; metadata.writeToMetadata(&builder); @@ -179,7 +179,7 @@ TEST_F(CatalogManagerReplSetTest, GetDatabaseExisting) { checkReadConcern(request.cmdObj, Timestamp(0, 0), repl::OpTime::kUninitializedTerm); - ReplSetMetadata metadata(10, OpTime(), newOpTime, 100, OID(), 30, -1); + ReplSetMetadata metadata(10, newOpTime, newOpTime, 100, OID(), 30, -1); BSONObjBuilder builder; metadata.writeToMetadata(&builder); @@ -505,7 +505,7 @@ TEST_F(CatalogManagerReplSetTest, GetChunksForNSWithSortAndLimit) { checkReadConcern(request.cmdObj, Timestamp(0, 0), repl::OpTime::kUninitializedTerm); - ReplSetMetadata metadata(10, OpTime(), newOpTime, 100, OID(), 30, -1); + ReplSetMetadata metadata(10, newOpTime, newOpTime, 100, OID(), 30, -1); BSONObjBuilder builder; metadata.writeToMetadata(&builder); @@ -1017,7 +1017,7 @@ TEST_F(CatalogManagerReplSetTest, GetCollectionsValidResultsNoDb) { checkReadConcern(request.cmdObj, Timestamp(0, 0), repl::OpTime::kUninitializedTerm); - ReplSetMetadata metadata(10, OpTime(), newOpTime, 100, OID(), 30, -1); + ReplSetMetadata metadata(10, newOpTime, newOpTime, 100, OID(), 30, -1); BSONObjBuilder builder; metadata.writeToMetadata(&builder); @@ -2343,7 +2343,7 @@ TEST_F(CatalogManagerReplSetTest, BasicReadAfterOpTime) { ASSERT_EQ(string("dummy"), request.cmdObj.firstElementFieldName()); checkReadConcern(request.cmdObj, lastOpTime.getTimestamp(), lastOpTime.getTerm()); - ReplSetMetadata metadata(10, repl::OpTime(), newOpTime, 100, OID(), 30, -1); + ReplSetMetadata metadata(10, newOpTime, newOpTime, 100, OID(), 30, -1); BSONObjBuilder builder; metadata.writeToMetadata(&builder); @@ -2378,7 +2378,7 @@ TEST_F(CatalogManagerReplSetTest, ReadAfterOpTimeShouldNotGoBack) { ASSERT_EQ(string("dummy"), request.cmdObj.firstElementFieldName()); checkReadConcern(request.cmdObj, highestOpTime.getTimestamp(), highestOpTime.getTerm()); - ReplSetMetadata metadata(10, repl::OpTime(), newOpTime, 100, OID(), 30, -1); + ReplSetMetadata metadata(10, newOpTime, newOpTime, 100, OID(), 30, -1); BSONObjBuilder builder; metadata.writeToMetadata(&builder); @@ -2406,7 +2406,7 @@ TEST_F(CatalogManagerReplSetTest, ReadAfterOpTimeShouldNotGoBack) { ASSERT_EQ(string("dummy"), request.cmdObj.firstElementFieldName()); checkReadConcern(request.cmdObj, highestOpTime.getTimestamp(), highestOpTime.getTerm()); - ReplSetMetadata metadata(10, repl::OpTime(), oldOpTime, 100, OID(), 30, -1); + ReplSetMetadata metadata(10, oldOpTime, oldOpTime, 100, OID(), 30, -1); BSONObjBuilder builder; metadata.writeToMetadata(&builder); @@ -2430,7 +2430,7 @@ TEST_F(CatalogManagerReplSetTest, ReadAfterOpTimeShouldNotGoBack) { ASSERT_EQ(string("dummy"), request.cmdObj.firstElementFieldName()); checkReadConcern(request.cmdObj, highestOpTime.getTimestamp(), highestOpTime.getTerm()); - ReplSetMetadata metadata(10, repl::OpTime(), oldOpTime, 100, OID(), 30, -1); + ReplSetMetadata metadata(10, oldOpTime, oldOpTime, 100, OID(), 30, -1); BSONObjBuilder builder; metadata.writeToMetadata(&builder); @@ -2455,7 +2455,7 @@ TEST_F(CatalogManagerReplSetTest, ReadAfterOpTimeFindThenCmd) { ASSERT_EQUALS(kReplSecondaryOkMetadata, request.metadata); checkReadConcern(request.cmdObj, highestOpTime.getTimestamp(), highestOpTime.getTerm()); - ReplSetMetadata metadata(10, repl::OpTime(), newOpTime, 100, OID(), 30, -1); + ReplSetMetadata metadata(10, newOpTime, newOpTime, 100, OID(), 30, -1); BSONObjBuilder builder; metadata.writeToMetadata(&builder); @@ -2514,7 +2514,7 @@ TEST_F(CatalogManagerReplSetTest, ReadAfterOpTimeCmdThenFind) { ASSERT_EQ(string("dummy"), request.cmdObj.firstElementFieldName()); checkReadConcern(request.cmdObj, highestOpTime.getTimestamp(), highestOpTime.getTerm()); - ReplSetMetadata metadata(10, repl::OpTime(), newOpTime, 100, OID(), 30, -1); + ReplSetMetadata metadata(10, newOpTime, newOpTime, 100, OID(), 30, -1); BSONObjBuilder builder; metadata.writeToMetadata(&builder); diff --git a/src/mongo/s/client/shard_registry.cpp b/src/mongo/s/client/shard_registry.cpp index 739348edde5..9287d7daf8a 100644 --- a/src/mongo/s/client/shard_registry.cpp +++ b/src/mongo/s/client/shard_registry.cpp @@ -556,7 +556,7 @@ StatusWith<ShardRegistry::QueryResponse> ShardRegistry::_exhaustiveFindOnConfig( return; } - response.opTime = replParseStatus.getValue().getLastOpVisible(); + response.opTime = replParseStatus.getValue().getLastOpCommitted(); advanceConfigOpTime(response.opTime); } @@ -837,11 +837,17 @@ StatusWith<ShardRegistry::CommandResponse> ShardRegistry::_runCommandWithMetadat return replParseStatus.getStatus(); } + // Use the last committed optime to advance config optime. + // For successful majority writes, we could use the optime of the last op + // from us and lastOpCommitted is always greater than or equal to it. + // On majority write failures, the last visible optime would be incorrect + // due to rollback as explained in SERVER-24630 and the last committed optime + // is safe to use. const auto& replMetadata = replParseStatus.getValue(); - cmdResponse.visibleOpTime = replMetadata.getLastOpVisible(); + const OpTime lastCommittedOpTime = replMetadata.getLastOpCommitted(); if (shard->isConfig()) { - advanceConfigOpTime(cmdResponse.visibleOpTime); + advanceConfigOpTime(lastCommittedOpTime); } } diff --git a/src/mongo/s/client/shard_registry.h b/src/mongo/s/client/shard_registry.h index 3cfcfc0dac6..3b3ce2ce902 100644 --- a/src/mongo/s/client/shard_registry.h +++ b/src/mongo/s/client/shard_registry.h @@ -333,7 +333,6 @@ private: struct CommandResponse { BSONObj response; BSONObj metadata; - repl::OpTime visibleOpTime; }; /** |