diff options
author | Benety Goh <benety@mongodb.com> | 2017-04-25 14:23:22 -0400 |
---|---|---|
committer | Benety Goh <benety@mongodb.com> | 2017-04-27 11:00:32 -0400 |
commit | 983ac10e3a1f37cc762e2d68ff4cf9e8263154e5 (patch) | |
tree | 2459b9ea2d292a99f780fb9678924ca456f3551c | |
parent | dde12497e793c8da8237ec983fd1dd1dd0b9e5dc (diff) | |
download | mongo-983ac10e3a1f37cc762e2d68ff4cf9e8263154e5.tar.gz |
SERVER-28214 BackgroundSync falls back on 3.4 algorithm if RollbackImpl returns IncompatibleRollbackAlgorithm
-rw-r--r-- | src/mongo/base/error_codes.err | 1 | ||||
-rw-r--r-- | src/mongo/db/repl/bgsync.cpp | 46 | ||||
-rw-r--r-- | src/mongo/db/repl/bgsync.h | 19 |
3 files changed, 50 insertions, 16 deletions
diff --git a/src/mongo/base/error_codes.err b/src/mongo/base/error_codes.err index b01f2b09b30..3754363d9e7 100644 --- a/src/mongo/base/error_codes.err +++ b/src/mongo/base/error_codes.err @@ -209,6 +209,7 @@ error_code("TooManyLocks", 208) error_code("StaleClusterTime", 209) error_code("CannotVerifyAndSignLogicalTime", 210) error_code("KeyNotFound", 211) +error_code("IncompatibleRollbackAlgorithm", 212) # Error codes 4000-8999 are reserved. diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp index 09784b76d67..387dd04cd40 100644 --- a/src/mongo/db/repl/bgsync.cpp +++ b/src/mongo/db/repl/bgsync.cpp @@ -617,19 +617,8 @@ void BackgroundSync::_runRollback(OperationContext* opCtx, OplogInterfaceLocal localOplog(opCtx, rsOplogName); if (use3dot4Rollback) { - const int messagingPortTags = 0; - ConnectionPool connectionPool(messagingPortTags); - std::unique_ptr<ConnectionPool::ConnectionPtr> connection; - auto getConnection = [&connection, &connectionPool, source]() -> DBClientBase* { - if (!connection.get()) { - connection.reset(new ConnectionPool::ConnectionPtr( - &connectionPool, source, Date_t::now(), kRollbackOplogSocketTimeout)); - }; - return connection->get(); - }; - - RollbackSourceImpl rollbackSource(getConnection, source, rsOplogName); - rollback(opCtx, localOplog, rollbackSource, requiredRBID, _replCoord, storageInterface); + log() << "Rollback falling back on 3.4 algorithm due to startup server parameter"; + _fallBackOn3dot4Rollback(opCtx, source, requiredRBID, &localOplog, storageInterface); } else { AbstractAsyncComponent* rollback; StatusWith<OpTime> onRollbackShutdownResult = @@ -663,11 +652,16 @@ void BackgroundSync::_runRollback(OperationContext* opCtx, warning() << "Unable to schedule rollback: " << scheduleStatus; } else { rollback->join(); - if (!onRollbackShutdownResult.isOK()) { - warning() << "Rollback failed with error: " << onRollbackShutdownResult.getStatus(); - } else { + auto status = onRollbackShutdownResult.getStatus(); + if (status.isOK()) { log() << "Rollback successful. Last applied optime: " << onRollbackShutdownResult.getValue(); + } else if (ErrorCodes::IncompatibleRollbackAlgorithm == status) { + log() << "Rollback falling back on 3.4 algorithm due to " << status; + _fallBackOn3dot4Rollback( + opCtx, source, requiredRBID, &localOplog, storageInterface); + } else { + warning() << "Rollback failed with error: " << status; } } } @@ -677,6 +671,26 @@ void BackgroundSync::_runRollback(OperationContext* opCtx, startProducerIfStopped(); } +void BackgroundSync::_fallBackOn3dot4Rollback(OperationContext* opCtx, + const HostAndPort& source, + int requiredRBID, + OplogInterface* localOplog, + StorageInterface* storageInterface) { + const int messagingPortTags = 0; + ConnectionPool connectionPool(messagingPortTags); + std::unique_ptr<ConnectionPool::ConnectionPtr> connection; + auto getConnection = [&connection, &connectionPool, source]() -> DBClientBase* { + if (!connection.get()) { + connection.reset(new ConnectionPool::ConnectionPtr( + &connectionPool, source, Date_t::now(), kRollbackOplogSocketTimeout)); + }; + return connection->get(); + }; + + RollbackSourceImpl rollbackSource(getConnection, source, rsOplogName); + rollback(opCtx, *localOplog, rollbackSource, requiredRBID, _replCoord, storageInterface); +} + HostAndPort BackgroundSync::getSyncTarget() const { stdx::unique_lock<stdx::mutex> lock(_mutex); return _syncSourceHost; diff --git a/src/mongo/db/repl/bgsync.h b/src/mongo/db/repl/bgsync.h index fc18a4dd6c3..e76bcc0dd83 100644 --- a/src/mongo/db/repl/bgsync.h +++ b/src/mongo/db/repl/bgsync.h @@ -52,6 +52,7 @@ class OperationContext; namespace repl { +class OplogInterface; class ReplicationCoordinator; class ReplicationCoordinatorExternalState; class StorageInterface; @@ -171,6 +172,24 @@ private: int requiredRBID, StorageInterface* storageInterface); + /** + * Executes a rollback using the 3.4 algorithm in rs_rollback.cpp. + * + * We fall back on the 3.4 rollback algorithm when: + * 1) the server parameter "use3dot4Rollback" is enabled; or + * 2) the current rollback algorithm in RollbackImpl determines that it cannot handle certain + * 3.4 operations (either in the local or remote oplog) and returns an error code of + * MustFallBackOn3dot4Rollback. + * + * Must be called from _runRollback() which ensures that all the conditions for entering + * rollback have been met. + */ + void _fallBackOn3dot4Rollback(OperationContext* opCtx, + const HostAndPort& source, + int requiredRBID, + OplogInterface* localOplog, + StorageInterface* storageInterface); + // restart syncing void start(OperationContext* opCtx); |