diff options
author | Jason Chan <jason.chan@10gen.com> | 2019-07-12 13:07:33 -0400 |
---|---|---|
committer | Jason Chan <jason.chan@10gen.com> | 2019-07-12 13:07:33 -0400 |
commit | a5d088eefec42927f339ff9288f9eb078d5a8686 (patch) | |
tree | 07289b582619444b36526f0b40a25672bb24cd4a | |
parent | 9a8b1da427689c62bf38a77b9d80b240d0850d37 (diff) | |
download | mongo-a5d088eefec42927f339ff9288f9eb078d5a8686.tar.gz |
SERVER-41261 Use the oplog entry after the common point to calculate rollbackTimeLimitSecs
-rw-r--r-- | src/mongo/db/repl/roll_back_local_operations.cpp | 16 | ||||
-rw-r--r-- | src/mongo/db/repl/roll_back_local_operations.h | 8 | ||||
-rw-r--r-- | src/mongo/db/repl/roll_back_local_operations_test.cpp | 28 | ||||
-rw-r--r-- | src/mongo/db/repl/rollback_impl.cpp | 44 | ||||
-rw-r--r-- | src/mongo/db/repl/rollback_impl.h | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/rollback_impl.idl | 2 | ||||
-rw-r--r-- | src/mongo/db/repl/rollback_impl_test.cpp | 36 |
7 files changed, 107 insertions, 31 deletions
diff --git a/src/mongo/db/repl/roll_back_local_operations.cpp b/src/mongo/db/repl/roll_back_local_operations.cpp index 9b516691b08..09047074164 100644 --- a/src/mongo/db/repl/roll_back_local_operations.cpp +++ b/src/mongo/db/repl/roll_back_local_operations.cpp @@ -87,11 +87,15 @@ RollBackLocalOperations::RollBackLocalOperations(const OplogInterface& localOplo } RollBackLocalOperations::RollbackCommonPoint::RollbackCommonPoint(BSONObj oplogBSON, - RecordId recordId) + RecordId recordId, + BSONObj nextOplogBSON) : _recordId(std::move(recordId)) { auto oplogEntry = uassertStatusOK(repl::OplogEntry::parse(oplogBSON)); _opTime = oplogEntry.getOpTime(); _wallClockTime = oplogEntry.getWallClockTime(); + // nextOplogEntry holds the oplog entry immediately after the common point. + auto nextOplogEntry = uassertStatusOK(repl::OplogEntry::parse(nextOplogBSON)); + _firstWallClockTimeAfterCommonPoint = nextOplogEntry.getWallClockTime(); } StatusWith<RollBackLocalOperations::RollbackCommonPoint> RollBackLocalOperations::onRemoteOperation( @@ -104,6 +108,10 @@ StatusWith<RollBackLocalOperations::RollbackCommonPoint> RollBackLocalOperations _localOplogValue = result.getValue(); } + // As we iterate through the oplog in reverse, opAfterCurrentEntry holds the oplog entry + // immediately after the entry stored in _localOplogValue. + BSONObj opAfterCurrentEntry = _localOplogValue.first; + while (getTimestamp(_localOplogValue) > getTimestamp(operation)) { _scanned++; LOG(2) << "Local oplog entry to roll back: " << redact(_localOplogValue.first); @@ -124,13 +132,15 @@ StatusWith<RollBackLocalOperations::RollbackCommonPoint> RollBackLocalOperations << getTimestamp(_localOplogValue).toString() << "}"); } + opAfterCurrentEntry = _localOplogValue.first; _localOplogValue = result.getValue(); } if (getTimestamp(_localOplogValue) == getTimestamp(operation)) { _scanned++; if (getTerm(_localOplogValue) == getTerm(operation)) { - return RollbackCommonPoint(_localOplogValue.first, _localOplogValue.second); + return RollbackCommonPoint( + _localOplogValue.first, _localOplogValue.second, opAfterCurrentEntry); } // We don't need to advance the localOplogIterator here because it is guaranteed to advance @@ -178,8 +188,8 @@ StatusWith<RollBackLocalOperations::RollbackCommonPoint> syncRollBackLocalOperat RollBackLocalOperations finder(localOplog, rollbackOperation); Timestamp theirTime; while (remoteResult.isOK()) { - theirTime = remoteResult.getValue().first["ts"].timestamp(); BSONObj theirObj = remoteResult.getValue().first; + theirTime = theirObj["ts"].timestamp(); auto result = finder.onRemoteOperation(theirObj); if (result.isOK()) { return result.getValue(); diff --git a/src/mongo/db/repl/roll_back_local_operations.h b/src/mongo/db/repl/roll_back_local_operations.h index baff79544a6..986f1ebbf22 100644 --- a/src/mongo/db/repl/roll_back_local_operations.h +++ b/src/mongo/db/repl/roll_back_local_operations.h @@ -62,7 +62,7 @@ public: class RollbackCommonPoint { public: - RollbackCommonPoint(BSONObj oplogBSON, RecordId recordId); + RollbackCommonPoint(BSONObj oplogBSON, RecordId recordId, BSONObj nextOplogBSON); RecordId getRecordId() const { return _recordId; @@ -76,10 +76,16 @@ public: return _wallClockTime; } + boost::optional<Date_t> getFirstOpWallClockTimeAfterCommonPoint() { + return _firstWallClockTimeAfterCommonPoint; + } + private: RecordId _recordId; OpTime _opTime; boost::optional<Date_t> _wallClockTime; + // The wall clock time of the first operation after the common point if it exists. + boost::optional<Date_t> _firstWallClockTimeAfterCommonPoint; }; /** diff --git a/src/mongo/db/repl/roll_back_local_operations_test.cpp b/src/mongo/db/repl/roll_back_local_operations_test.cpp index 6e041181191..1f8a933b67c 100644 --- a/src/mongo/db/repl/roll_back_local_operations_test.cpp +++ b/src/mongo/db/repl/roll_back_local_operations_test.cpp @@ -58,10 +58,29 @@ BSONObj makeOp(long long seconds, long long term = 1LL) { << uuid); } +BSONObj makeOpWithWallClockTime(long count, long wallClockMillis, long long term = 1LL) { + auto uuid = unittest::assertGet(UUID::parse("b4c66a44-c1ca-4d86-8d25-12e82fa2de5b")); + return BSON("ts" << Timestamp(count, count) << "t" << term << "op" + << "n" + << "o" + << BSONObj() + << "ns" + << "roll_back_local_operations.test" + << "ui" + << uuid + << "wall" + << Date_t::fromMillisSinceEpoch(wallClockMillis)); +}; + int recordId = 0; OplogInterfaceMock::Operation makeOpAndRecordId(long long seconds, long long term = 1LL) { return std::make_pair(makeOp(seconds), RecordId(++recordId)); } +OplogInterfaceMock::Operation makeOpWithWallClockTimeAndRecordId(long long seconds, + long wallClockMillis, + long long term = 1LL) { + return std::make_pair(makeOpWithWallClockTime(seconds, wallClockMillis), RecordId(++recordId)); +} TEST(RollBackLocalOperationsTest, InvalidLocalOplogIterator) { class InvalidOplogInterface : public OplogInterface { @@ -220,9 +239,10 @@ TEST(SyncRollBackLocalOperationsTest, RemoteOplogMissing) { } TEST(SyncRollBackLocalOperationsTest, RollbackTwoOperations) { - auto commonOperation = makeOpAndRecordId(1); + auto commonOperation = makeOpWithWallClockTimeAndRecordId(1, 1 * 5000); + auto firstOpAfterCommonPoint = makeOpWithWallClockTimeAndRecordId(2, 2 * 60 * 60 * 24 * 1000); OplogInterfaceMock::Operations localOperations({ - makeOpAndRecordId(3), makeOpAndRecordId(2), commonOperation, + makeOpAndRecordId(3), firstOpAfterCommonPoint, commonOperation, }); auto i = localOperations.cbegin(); auto result = syncRollBackLocalOperations(OplogInterfaceMock(localOperations), @@ -238,6 +258,10 @@ TEST(SyncRollBackLocalOperationsTest, RollbackTwoOperations) { ASSERT_EQUALS(commonOperation.second, result.getValue().getRecordId()); ASSERT_FALSE(i == localOperations.cend()); ASSERT_BSONOBJ_EQ(commonOperation.first, i->first); + auto firstOplogEntryAfterCommonPoint = + uassertStatusOK(OplogEntry::parse(firstOpAfterCommonPoint.first)); + ASSERT_EQUALS(*result.getValue().getFirstOpWallClockTimeAfterCommonPoint(), + *firstOplogEntryAfterCommonPoint.getWallClockTime()); i++; ASSERT_TRUE(i == localOperations.cend()); } diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp index 85d919cae2f..b6aca140721 100644 --- a/src/mongo/db/repl/rollback_impl.cpp +++ b/src/mongo/db/repl/rollback_impl.cpp @@ -987,24 +987,28 @@ Status RollbackImpl::_checkAgainstTimeLimit( _rollbackStats.lastLocalOptime = topOfOplog.getOpTime(); auto topOfOplogWallOpt = topOfOplog.getWallClockTime(); - auto commonPointWallOpt = commonPoint.getWallClockTime(); + // We check the difference between the top of the oplog and the first oplog entry after the + // common point when computing the rollback time limit. + auto firstOpWallClockTimeAfterCommonPointOpt = + commonPoint.getFirstOpWallClockTimeAfterCommonPoint(); - // Only compute the difference if both the top of the oplog and the common point - // have wall clock times. - if (commonPointWallOpt && topOfOplogWallOpt) { + + // Only compute the difference if both the top of the oplog and the first oplog entry after the + // common point have wall clock times. + if (firstOpWallClockTimeAfterCommonPointOpt && topOfOplogWallOpt) { auto topOfOplogWallTime = topOfOplogWallOpt.get(); - auto commonPointWallTime = commonPointWallOpt.get(); + auto firstOpWallClockTimeAfterCommonPoint = firstOpWallClockTimeAfterCommonPointOpt.get(); - if (topOfOplogWallTime >= commonPointWallTime) { + if (topOfOplogWallTime >= firstOpWallClockTimeAfterCommonPoint) { - unsigned long long diff = - durationCount<Seconds>(Milliseconds(topOfOplogWallTime - commonPointWallTime)); + unsigned long long diff = durationCount<Seconds>( + Milliseconds(topOfOplogWallTime - firstOpWallClockTimeAfterCommonPoint)); _rollbackStats.lastLocalWallClockTime = topOfOplogWallTime; - _rollbackStats.commonPointWallClockTime = commonPointWallTime; + _rollbackStats.firstOpWallClockTimeAfterCommonPoint = + firstOpWallClockTimeAfterCommonPoint; auto timeLimit = static_cast<unsigned long long>(gRollbackTimeLimitSecs.loadRelaxed()); - if (diff > timeLimit) { return Status(ErrorCodes::UnrecoverableRollbackError, str::stream() << "not willing to roll back more than " << timeLimit @@ -1014,9 +1018,11 @@ Status RollbackImpl::_checkAgainstTimeLimit( } } else { - warning() << "Wall clock times on oplog entries not monotonically increasing. This " - "might indicate a backward clock skew. Time at common point: " - << commonPointWallTime << ". Time at top of oplog: " << topOfOplogWallTime; + warning() + << "Wall clock times on oplog entries not monotonically increasing. This " + "might indicate a backward clock skew. Time at first oplog after common point: " + << firstOpWallClockTimeAfterCommonPoint + << ". Time at top of oplog: " << topOfOplogWallTime; } } @@ -1212,14 +1218,18 @@ void RollbackImpl::_summarizeRollback(OperationContext* opCtx) const { if (_rollbackStats.commonPoint) { log() << "\tcommon point optime: " << *_rollbackStats.commonPoint; } - if (_rollbackStats.lastLocalWallClockTime && _rollbackStats.commonPointWallClockTime) { + if (_rollbackStats.lastLocalWallClockTime && + _rollbackStats.firstOpWallClockTimeAfterCommonPoint) { auto lastWall = *_rollbackStats.lastLocalWallClockTime; - auto commonWall = *_rollbackStats.commonPointWallClockTime; - unsigned long long diff = durationCount<Seconds>(Milliseconds(lastWall - commonWall)); + auto firstOpWallClockTimeAfterCommonPoint = + *_rollbackStats.firstOpWallClockTimeAfterCommonPoint; + unsigned long long diff = + durationCount<Seconds>(Milliseconds(lastWall - firstOpWallClockTimeAfterCommonPoint)); log() << "\tlast wall clock time on the branch of history rolled back: " << lastWall; - log() << "\tcommon point wall clock time: " << commonWall; + log() << "\twall clock time of the first operation after the common point: " + << firstOpWallClockTimeAfterCommonPoint; log() << "\tdifference in wall clock times: " << diff << " second(s)"; } if (_rollbackStats.truncateTimestamp) { diff --git a/src/mongo/db/repl/rollback_impl.h b/src/mongo/db/repl/rollback_impl.h index 29a33dfd548..ef1e1985cb5 100644 --- a/src/mongo/db/repl/rollback_impl.h +++ b/src/mongo/db/repl/rollback_impl.h @@ -102,9 +102,9 @@ struct RollbackStats { boost::optional<Date_t> lastLocalWallClockTime; /** - * The wall clock time at the common point, if known. + * The wall clock time of the first operation after the common point, if known. */ - boost::optional<Date_t> commonPointWallClockTime; + boost::optional<Date_t> firstOpWallClockTimeAfterCommonPoint; }; /** diff --git a/src/mongo/db/repl/rollback_impl.idl b/src/mongo/db/repl/rollback_impl.idl index d814c44de63..6cd99cadd88 100644 --- a/src/mongo/db/repl/rollback_impl.idl +++ b/src/mongo/db/repl/rollback_impl.idl @@ -44,7 +44,7 @@ server_parameters: description: >- This amount, measured in seconds, represents the maximum allowed rollback period. It is calculated by taking the difference of the wall clock times of the oplog entries - at the top of the local oplog and at the common point. + at the top of the local oplog and the first oplog entry after the common point. set_at: [startup, runtime] cpp_vartype: AtomicWord<int> cpp_varname: gRollbackTimeLimitSecs diff --git a/src/mongo/db/repl/rollback_impl_test.cpp b/src/mongo/db/repl/rollback_impl_test.cpp index 836eecd1076..77a28bfbe1c 100644 --- a/src/mongo/db/repl/rollback_impl_test.cpp +++ b/src/mongo/db/repl/rollback_impl_test.cpp @@ -298,6 +298,8 @@ protected: UUID kGenericUUID = unittest::assertGet(UUID::parse(kGenericUUIDStr)); + const long kOneDayInMillis = 60 * 60 * 24 * 1000; + private: long _counter = 100; }; @@ -486,11 +488,31 @@ TEST_F(RollbackImplTest, RollbackReturnsNoMatchingDocumentWhenNoCommonPoint) { } TEST_F(RollbackImplTest, RollbackSucceedsIfRollbackPeriodIsWithinTimeLimit) { - - // The default limit is 1 day, so we make the difference be just under a day. auto commonPoint = makeOpAndRecordId(makeOpWithWallClockTime(1, 5 * 1000)); - auto topOfOplog = makeOpAndRecordId(makeOpWithWallClockTime(2, 60 * 60 * 24 * 1000)); + // We use the difference of wall clock times between the top of the oplog and the first op after + // the common point to calculate the rollback time limit. + auto firstOpAfterCommonPoint = + makeOpAndRecordId(makeOpWithWallClockTime(2, 2 * kOneDayInMillis)); + // The default limit is 1 day, so we make the difference be just under a day. + auto topOfOplog = makeOpAndRecordId(makeOpWithWallClockTime(3, kOneDayInMillis)); + _remoteOplog->setOperations({commonPoint}); + ASSERT_OK(_insertOplogEntry(commonPoint.first)); + ASSERT_OK(_insertOplogEntry(firstOpAfterCommonPoint.first)); + ASSERT_OK(_insertOplogEntry(topOfOplog.first)); + _storageInterface->setStableTimestamp(nullptr, Timestamp(1, 1)); + + // Run rollback. + ASSERT_OK(_rollback->runRollback(_opCtx.get())); +} + +TEST_F(RollbackImplTest, RollbackSucceedsIfTopOfOplogIsFirstOpAfterCommonPoint) { + + auto commonPoint = makeOpAndRecordId(makeOpWithWallClockTime(1, 5 * 1000)); + // The default limit is 1 day, so we make the difference be 2 days. The rollback should still + // succeed since we calculate the difference of wall clock times between the top of the oplog + // and the first op after the common point which are both the same operation in this case. + auto topOfOplog = makeOpAndRecordId(makeOpWithWallClockTime(3, 2 * kOneDayInMillis)); _remoteOplog->setOperations({commonPoint}); ASSERT_OK(_insertOplogEntry(commonPoint.first)); ASSERT_OK(_insertOplogEntry(topOfOplog.first)); @@ -546,12 +568,16 @@ TEST_F(RollbackImplTest, RollbackKillsNecessaryOperations) { TEST_F(RollbackImplTest, RollbackFailsIfRollbackPeriodIsTooLong) { - // The default limit is 1 day, so we make the difference be 2 days. auto commonPoint = makeOpAndRecordId(makeOpWithWallClockTime(1, 5 * 1000)); - auto topOfOplog = makeOpAndRecordId(makeOpWithWallClockTime(2, 2 * 60 * 60 * 24 * 1000)); + auto opAfterCommonPoint = makeOpAndRecordId(makeOpWithWallClockTime(2, 5 * 1000)); + // We calculate the roll back time limit by comparing the difference between the top of the + // oplog and the first oplog entry after the commit point. The default limit is 1 day, so we + // make the difference be 2 days. + auto topOfOplog = makeOpAndRecordId(makeOpWithWallClockTime(3, 2 * 60 * 60 * 24 * 1000)); _remoteOplog->setOperations({commonPoint}); ASSERT_OK(_insertOplogEntry(commonPoint.first)); + ASSERT_OK(_insertOplogEntry(opAfterCommonPoint.first)); ASSERT_OK(_insertOplogEntry(topOfOplog.first)); _storageInterface->setStableTimestamp(nullptr, Timestamp(1, 1)); |