summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorWilliam Schultz <william.schultz@mongodb.com>2017-08-16 16:59:57 -0400
committerWilliam Schultz <william.schultz@mongodb.com>2017-08-16 17:00:18 -0400
commitba40a3fff428f881afc465e125df03af154df845 (patch)
tree933ad2daab77c9178b9059a57a76145312ff0c4d /src
parentfe72cc35ff8af7bf421d29c668e4d50c048d141b (diff)
downloadmongo-ba40a3fff428f881afc465e125df03af154df845.tar.gz
SERVER-29898 Call recoverToStableTimestamp under the global lock in rollback
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/repl/bgsync.cpp4
-rw-r--r--src/mongo/db/repl/rollback_impl.cpp23
-rw-r--r--src/mongo/db/repl/rollback_impl.h7
-rw-r--r--src/mongo/db/repl/rollback_impl_test.cpp111
-rw-r--r--src/mongo/db/repl/storage_interface.h11
-rw-r--r--src/mongo/db/repl/storage_interface_impl.cpp4
-rw-r--r--src/mongo/db/repl/storage_interface_impl.h2
-rw-r--r--src/mongo/db/repl/storage_interface_mock.h4
8 files changed, 161 insertions, 5 deletions
diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp
index da1655c99d2..bda25830c4f 100644
--- a/src/mongo/db/repl/bgsync.cpp
+++ b/src/mongo/db/repl/bgsync.cpp
@@ -730,8 +730,8 @@ void BackgroundSync::_runRollbackViaRecoverToCheckpoint(
}
}
- _rollback =
- stdx::make_unique<RollbackImpl>(localOplog, &remoteOplog, _replicationProcess, _replCoord);
+ _rollback = stdx::make_unique<RollbackImpl>(
+ localOplog, &remoteOplog, storageInterface, _replicationProcess, _replCoord);
log() << "Scheduling rollback (sync source: " << source << ")";
auto status = _rollback->runRollback(opCtx);
diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp
index 80b958fa049..8e55421101b 100644
--- a/src/mongo/db/repl/rollback_impl.cpp
+++ b/src/mongo/db/repl/rollback_impl.cpp
@@ -37,6 +37,7 @@
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/replication_process.h"
#include "mongo/db/repl/roll_back_local_operations.h"
+#include "mongo/db/repl/storage_interface.h"
#include "mongo/db/s/shard_identity_rollback_notifier.h"
#include "mongo/db/session_catalog.h"
#include "mongo/util/log.h"
@@ -47,17 +48,20 @@ namespace repl {
RollbackImpl::RollbackImpl(OplogInterface* localOplog,
OplogInterface* remoteOplog,
+ StorageInterface* storageInterface,
ReplicationProcess* replicationProcess,
ReplicationCoordinator* replicationCoordinator,
Listener* listener)
: _localOplog(localOplog),
_remoteOplog(remoteOplog),
+ _storageInterface(storageInterface),
_replicationProcess(replicationProcess),
_replicationCoordinator(replicationCoordinator),
_listener(listener) {
invariant(localOplog);
invariant(remoteOplog);
+ invariant(storageInterface);
invariant(replicationProcess);
invariant(replicationCoordinator);
invariant(listener);
@@ -65,9 +69,15 @@ RollbackImpl::RollbackImpl(OplogInterface* localOplog,
RollbackImpl::RollbackImpl(OplogInterface* localOplog,
OplogInterface* remoteOplog,
+ StorageInterface* storageInterface,
ReplicationProcess* replicationProcess,
ReplicationCoordinator* replicationCoordinator)
- : RollbackImpl(localOplog, remoteOplog, replicationProcess, replicationCoordinator, {}) {}
+ : RollbackImpl(localOplog,
+ remoteOplog,
+ storageInterface,
+ replicationProcess,
+ replicationCoordinator,
+ {}) {}
RollbackImpl::~RollbackImpl() {
shutdown();
@@ -104,6 +114,17 @@ Status RollbackImpl::runRollback(OperationContext* opCtx) {
return status;
}
+ // Recover to the stable timestamp while holding the global exclusive lock.
+ auto storageEngine = opCtx->getServiceContext()->getGlobalStorageEngine();
+ {
+ Lock::GlobalWrite globalWrite(opCtx);
+ status = _storageInterface->recoverToStableTimestamp(storageEngine);
+ if (!status.isOK()) {
+ return status;
+ }
+ }
+
+
// At this point these functions need to always be called before returning, even on failure.
// These functions fassert on failure.
ON_BLOCK_EXIT([this, opCtx] {
diff --git a/src/mongo/db/repl/rollback_impl.h b/src/mongo/db/repl/rollback_impl.h
index 0753195ea0a..46f4111ec95 100644
--- a/src/mongo/db/repl/rollback_impl.h
+++ b/src/mongo/db/repl/rollback_impl.h
@@ -30,6 +30,7 @@
#include "mongo/base/status_with.h"
#include "mongo/db/repl/rollback.h"
+#include "mongo/db/repl/storage_interface.h"
#include "mongo/stdx/functional.h"
namespace mongo {
@@ -111,6 +112,7 @@ public:
*/
RollbackImpl(OplogInterface* localOplog,
OplogInterface* remoteOplog,
+ StorageInterface* storageInterface,
ReplicationProcess* replicationProcess,
ReplicationCoordinator* replicationCoordinator,
Listener* listener);
@@ -120,6 +122,7 @@ public:
*/
RollbackImpl(OplogInterface* localOplog,
OplogInterface* remoteOplog,
+ StorageInterface* storageInterface,
ReplicationProcess* replicationProcess,
ReplicationCoordinator* replicationCoordinator);
@@ -199,6 +202,10 @@ private:
// This is used to read oplog entries from the remote oplog to find the common point.
OplogInterface* const _remoteOplog; // (R)
+ // The StorageInterface associated with this Rollback instance. Used to execute operations
+ // at the storage layer e.g. recovering to a timestamp.
+ StorageInterface* _storageInterface; // (R)
+
// The ReplicationProcess associated with this Rollback instance. Used to update and persist
// various pieces of replication state related to the rollback process.
ReplicationProcess* _replicationProcess; // (R)
diff --git a/src/mongo/db/repl/rollback_impl_test.cpp b/src/mongo/db/repl/rollback_impl_test.cpp
index 4e409b437fa..0c5aa79d773 100644
--- a/src/mongo/db/repl/rollback_impl_test.cpp
+++ b/src/mongo/db/repl/rollback_impl_test.cpp
@@ -36,6 +36,7 @@
#include "mongo/unittest/death_test.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/uuid.h"
+#include <boost/optional.hpp>
namespace {
@@ -44,6 +45,58 @@ using namespace mongo::repl;
NamespaceString nss("local.oplog.rs");
+class StorageInterfaceRollback : public StorageInterfaceImpl {
+public:
+ void setStableTimestamp(StorageEngine* storageEngine, SnapshotName snapshotName) override {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ _stableTimestamp = Timestamp(snapshotName.asU64());
+ }
+
+ /**
+ * If '_recoverToTimestampStatus' is non-empty, returns it. If '_recoverToTimestampStatus' is
+ * empty, updates '_currTimestamp' to be equal to '_stableTimestamp' and returns an OK status.
+ */
+ Status recoverToStableTimestamp(StorageEngine* storageEngine) override {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ if (_recoverToTimestampStatus) {
+ return _recoverToTimestampStatus.get();
+ } else {
+ _currTimestamp = _stableTimestamp;
+ return Status::OK();
+ }
+ }
+
+ void setRecoverToTimestampStatus(Status status) {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ _recoverToTimestampStatus = status;
+ }
+
+ void setCurrentTimestamp(Timestamp ts) {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ _currTimestamp = ts;
+ }
+
+ Timestamp getCurrentTimestamp() {
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ return _currTimestamp;
+ }
+
+private:
+ mutable stdx::mutex _mutex;
+
+ Timestamp _stableTimestamp;
+
+ // Used to mock the behavior of 'recoverToStableTimestamp'. Upon calling
+ // 'recoverToStableTimestamp', the 'currTimestamp' should be set to the current
+ // '_stableTimestamp' value. Can be viewed as mock version of replication's 'lastApplied'
+ // optime.
+ Timestamp _currTimestamp;
+
+ // A Status value which, if set, will be returned by the 'recoverToStableTimestamp' function, in
+ // order to simulate the error case for that function. Defaults to boost::none.
+ boost::optional<Status> _recoverToTimestampStatus = boost::none;
+};
+
/**
* Unit test for rollback implementation introduced in 3.6.
*/
@@ -61,6 +114,7 @@ private:
friend class RollbackImplTest::Listener;
protected:
+ std::unique_ptr<StorageInterfaceRollback> _storageInterface;
std::unique_ptr<OplogInterfaceMock> _localOplog;
std::unique_ptr<OplogInterfaceMock> _remoteOplog;
std::unique_ptr<RollbackImpl> _rollback;
@@ -77,11 +131,16 @@ protected:
void RollbackImplTest::setUp() {
RollbackTest::setUp();
+
+ // Set up test-specific storage interface.
+ _storageInterface = stdx::make_unique<StorageInterfaceRollback>();
+
_localOplog = stdx::make_unique<OplogInterfaceMock>();
_remoteOplog = stdx::make_unique<OplogInterfaceMock>();
_listener = stdx::make_unique<Listener>(this);
_rollback = stdx::make_unique<RollbackImpl>(_localOplog.get(),
_remoteOplog.get(),
+ _storageInterface.get(),
_replicationProcess.get(),
_coordinator,
_listener.get());
@@ -201,14 +260,62 @@ TEST_F(RollbackImplTest, RollbackIncrementsRollbackID) {
ASSERT_EQUALS(initRollbackId + 1, newRollbackId);
}
+TEST_F(RollbackImplTest, RollbackCallsRecoverToStableTimestamp) {
+ auto op = makeOpAndRecordId(1);
+ _remoteOplog->setOperations({op});
+ _localOplog->setOperations({op});
+
+ auto stableTimestamp = Timestamp(10, 0);
+ auto currTimestamp = Timestamp(20, 0);
+
+ _storageInterface->setStableTimestamp(nullptr, SnapshotName(stableTimestamp));
+ _storageInterface->setCurrentTimestamp(currTimestamp);
+
+ // Check the current timestamp.
+ ASSERT_EQUALS(currTimestamp, _storageInterface->getCurrentTimestamp());
+
+ // Run rollback.
+ ASSERT_OK(_rollback->runRollback(_opCtx.get()));
+
+ // Make sure "recover to timestamp" occurred by checking that the current timestamp was set back
+ // to the stable timestamp.
+ ASSERT_EQUALS(stableTimestamp, _storageInterface->getCurrentTimestamp());
+}
+
+TEST_F(RollbackImplTest, RollbackReturnsBadStatusIfRecoverToStableTimestampFails) {
+ auto op = makeOpAndRecordId(1);
+ _remoteOplog->setOperations({op});
+ _localOplog->setOperations({op});
+
+ auto stableTimestamp = Timestamp(10, 0);
+ auto currTimestamp = Timestamp(20, 0);
+ _storageInterface->setStableTimestamp(nullptr, SnapshotName(stableTimestamp));
+ _storageInterface->setCurrentTimestamp(currTimestamp);
+
+ // Make it so that the 'recoverToStableTimestamp' method will fail.
+ auto recoverToTimestampStatus =
+ Status(ErrorCodes::InternalError, "recoverToStableTimestamp failed.");
+ _storageInterface->setRecoverToTimestampStatus(recoverToTimestampStatus);
+
+ // Check the current timestamp.
+ ASSERT_EQUALS(currTimestamp, _storageInterface->getCurrentTimestamp());
+
+ // Run rollback.
+ auto rollbackStatus = _rollback->runRollback(_opCtx.get());
+
+ // Make sure rollback failed, and didn't execute the recover to timestamp logic.
+ ASSERT_EQUALS(recoverToTimestampStatus, rollbackStatus);
+ ASSERT_EQUALS(currTimestamp, _storageInterface->getCurrentTimestamp());
+}
+
TEST_F(RollbackImplTest, RollbackReturnsBadStatusIfIncrementRollbackIDFails) {
auto op = makeOpAndRecordId(1);
_remoteOplog->setOperations({op});
_localOplog->setOperations({op});
// Delete the rollback id collection.
- auto rollbackIdNss = NamespaceString(_storageInterface.kDefaultRollbackIdNamespace);
- ASSERT_OK(_storageInterface.dropCollection(_opCtx.get(), rollbackIdNss));
+ auto rollbackIdNss = NamespaceString(_storageInterface->kDefaultRollbackIdNamespace);
+ ASSERT_OK(_storageInterface->dropCollection(_opCtx.get(), rollbackIdNss));
// Run rollback.
auto status = _rollback->runRollback(_opCtx.get());
diff --git a/src/mongo/db/repl/storage_interface.h b/src/mongo/db/repl/storage_interface.h
index 93c46d691f8..8e37194b7a7 100644
--- a/src/mongo/db/repl/storage_interface.h
+++ b/src/mongo/db/repl/storage_interface.h
@@ -288,6 +288,17 @@ public:
*/
virtual void setInitialDataTimestamp(StorageEngine* storageEngine,
SnapshotName snapshotName) = 0;
+
+ /**
+ * Reverts the state of all database data to the last stable timestamp.
+ *
+ * The "local" database is exempt and none of its state should be reverted except for
+ * "local.replset.minvalid" and "local.replset.checkpointTimestamp" which should be reverted to
+ * the last stable timestamp.
+ *
+ * The 'stable' timestamp is set by calling StorageInterface::setStableTimestamp.
+ */
+ virtual Status recoverToStableTimestamp(StorageEngine* storageEngine) = 0;
};
} // namespace repl
diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp
index f532e5aa361..1c5fe3f719f 100644
--- a/src/mongo/db/repl/storage_interface_impl.cpp
+++ b/src/mongo/db/repl/storage_interface_impl.cpp
@@ -904,6 +904,10 @@ void StorageInterfaceImpl::setInitialDataTimestamp(StorageEngine* storageEngine,
storageEngine->setInitialDataTimestamp(snapshotName);
}
+Status StorageInterfaceImpl::recoverToStableTimestamp(StorageEngine* storageEngine) {
+ return storageEngine->recoverToStableTimestamp();
+}
+
Status StorageInterfaceImpl::isAdminDbValid(OperationContext* opCtx) {
AutoGetDb autoDB(opCtx, "admin", MODE_X);
auto adminDb = autoDB.getDb();
diff --git a/src/mongo/db/repl/storage_interface_impl.h b/src/mongo/db/repl/storage_interface_impl.h
index 109db947c38..49062980b69 100644
--- a/src/mongo/db/repl/storage_interface_impl.h
+++ b/src/mongo/db/repl/storage_interface_impl.h
@@ -138,6 +138,8 @@ public:
void setInitialDataTimestamp(StorageEngine* storageEngine, SnapshotName snapshotName) override;
+ Status recoverToStableTimestamp(StorageEngine* storageEngine) override;
+
/**
* Checks that the "admin" database contains a supported version of the auth data schema.
*/
diff --git a/src/mongo/db/repl/storage_interface_mock.h b/src/mongo/db/repl/storage_interface_mock.h
index 32eeef077e7..35f813e2f63 100644
--- a/src/mongo/db/repl/storage_interface_mock.h
+++ b/src/mongo/db/repl/storage_interface_mock.h
@@ -252,6 +252,10 @@ public:
SnapshotName getInitialDataTimestamp() const;
+ Status recoverToStableTimestamp(StorageEngine* storageEngine) override {
+ return Status{ErrorCodes::IllegalOperation, "recoverToStableTimestamp not implemented."};
+ }
+
Status isAdminDbValid(OperationContext* opCtx) override {
return isAdminDbValidFn(opCtx);
};