diff options
author | Kyle Suarez <kyle.suarez@mongodb.com> | 2018-03-29 15:15:56 -0400 |
---|---|---|
committer | Kyle Suarez <kyle.suarez@mongodb.com> | 2018-03-29 15:15:56 -0400 |
commit | e5f6c368bd34bb9122c0085401d2ead3d4d3f78c (patch) | |
tree | db8ed05c758cc51d91bb94620b64e7cb1f175d94 /src/mongo/db/repl | |
parent | e1cb7f58d887815ae696a824291fdfc59fc35861 (diff) | |
download | mongo-e5f6c368bd34bb9122c0085401d2ead3d4d3f78c.tar.gz |
SERVER-21693 generate a summary after rollback via recover to timestamp
Diffstat (limited to 'src/mongo/db/repl')
-rw-r--r-- | src/mongo/db/repl/rollback_impl.cpp | 125 | ||||
-rw-r--r-- | src/mongo/db/repl/rollback_impl.h | 57 |
2 files changed, 178 insertions, 4 deletions
diff --git a/src/mongo/db/repl/rollback_impl.cpp b/src/mongo/db/repl/rollback_impl.cpp index db12fe6b1f6..2873426a767 100644 --- a/src/mongo/db/repl/rollback_impl.cpp +++ b/src/mongo/db/repl/rollback_impl.cpp @@ -67,6 +67,11 @@ RollbackImpl::Listener kNoopListener; // stable timestamp. constexpr bool createRollbackFilesDefault = true; MONGO_EXPORT_SERVER_PARAMETER(createRollbackDataFiles, bool, createRollbackFilesDefault); + +// The name of the insert, update and delete commands as found in oplog command entries. +constexpr auto kInsertCmdName = "insert"_sd; +constexpr auto kUpdateCmdName = "update"_sd; +constexpr auto kDeleteCmdName = "delete"_sd; } // namespace constexpr const char* RollbackImpl::kRollbackRemoveSaverType; @@ -114,6 +119,8 @@ RollbackImpl::~RollbackImpl() { } Status RollbackImpl::runRollback(OperationContext* opCtx) { + _rollbackStats.startTime = opCtx->getServiceContext()->getFastClockSource()->now(); + auto status = _transitionToRollback(opCtx); if (!status.isOK()) { return status; @@ -123,6 +130,7 @@ Status RollbackImpl::runRollback(OperationContext* opCtx) { // After successfully transitioning to the ROLLBACK state, we must always transition back to // SECONDARY, even if we fail at any point during the rollback process. ON_BLOCK_EXIT([this, opCtx] { _transitionFromRollbackToSecondary(opCtx); }); + ON_BLOCK_EXIT([this, opCtx] { _summarizeRollback(opCtx); }); // Wait for all background index builds to complete before starting the rollback process. status = _awaitBgIndexCompletion(opCtx); @@ -131,11 +139,27 @@ Status RollbackImpl::runRollback(OperationContext* opCtx) { } _listener->onBgIndexesComplete(); + // Save the latest optime on the branch of history being rolled back (for informational + // purposes). + { + auto localOplogIter = _localOplog->makeIterator(); + const auto topOfOplog = localOplogIter->next(); + if (topOfOplog.isOK()) { + const auto lastEntryBSON = topOfOplog.getValue().first; + const auto lastEntryOptime = OpTime::parseFromOplogEntry(lastEntryBSON); + if (lastEntryOptime.isOK()) { + _rollbackStats.lastLocalOptime = std::move(lastEntryOptime.getValue()); + } + } + } + auto commonPointSW = _findCommonPoint(opCtx); if (!commonPointSW.isOK()) { return commonPointSW.getStatus(); } - _listener->onCommonPointFound(commonPointSW.getValue().first.getTimestamp()); + const auto commonPoint = commonPointSW.getValue().first; + _rollbackStats.commonPoint = commonPoint; + _listener->onCommonPointFound(commonPoint.getTimestamp()); // Increment the Rollback ID of this node. The Rollback ID is a natural number that it is // incremented by 1 every time a rollback occurs. Note that the Rollback ID must be incremented @@ -144,6 +168,7 @@ Status RollbackImpl::runRollback(OperationContext* opCtx) { if (!status.isOK()) { return status; } + _rollbackStats.rollbackId = _replicationProcess->getRollbackID(); // Write a rollback file for each namespace that has documents that would be deleted by // rollback. @@ -157,8 +182,16 @@ Status RollbackImpl::runRollback(OperationContext* opCtx) { if (!stableTimestampSW.isOK()) { return stableTimestampSW.getStatus(); } + _rollbackStats.stableTimestamp = stableTimestampSW.getValue(); _listener->onRecoverToStableTimestamp(stableTimestampSW.getValue()); + // Log the total number of insert and update operations that have been rolled back as a result + // of recovering to the stable timestamp. + log() << "Rollback reverted " << _observerInfo.rollbackCommandCounts[kInsertCmdName] + << " insert operations, " << _observerInfo.rollbackCommandCounts[kUpdateCmdName] + << " update operations and " << _observerInfo.rollbackCommandCounts[kDeleteCmdName] + << " delete operations."; + // During replication recovery, we truncate all oplog entries with timestamps greater than or // equal to the oplog truncate after point. As a result, we must find the oplog entry after // the common point so we do not truncate the common point itself. If we entered rollback, @@ -177,6 +210,7 @@ Status RollbackImpl::runRollback(OperationContext* opCtx) { // Note that storage engine timestamp recovery only restores the database *data* to a stable // timestamp, but does not revert the oplog, which must be done as part of the rollback process. _replicationProcess->getConsistencyMarkers()->setOplogTruncateAfterPoint(opCtx, truncatePoint); + _rollbackStats.truncateTimestamp = truncatePoint; _listener->onSetOplogTruncateAfterPoint(truncatePoint); // Align the drop pending reaper state with what's on disk. Oplog recovery depends on those @@ -195,13 +229,14 @@ Status RollbackImpl::runRollback(OperationContext* opCtx) { // oplog, which should now be at the common point. _replicationCoordinator->resetLastOpTimesFromOplog( opCtx, ReplicationCoordinator::DataConsistency::Consistent); - status = _triggerOpObserver(opCtx); if (!status.isOK()) { return status; } _listener->onRollbackOpObserver(_observerInfo); + log() << "Rollback complete"; + return Status::OK(); } @@ -351,6 +386,8 @@ StatusWith<std::set<NamespaceString>> RollbackImpl::_namespacesForOp(const Oplog * structures. */ Status RollbackImpl::_processRollbackOp(const OplogEntry& oplogEntry) { + ++_observerInfo.numberOfEntriesObserved; + NamespaceString opNss = oplogEntry.getNamespace(); OpTypeEnum opType = oplogEntry.getOpType(); @@ -397,13 +434,17 @@ Status RollbackImpl::_processRollbackOp(const OplogEntry& oplogEntry) { // a rollback file. if (opType == OpTypeEnum::kInsert || opType == OpTypeEnum::kUpdate) { const auto uuid = oplogEntry.getUuid(); - dassert(uuid); + invariant(uuid, + str::stream() << "Oplog entry to roll back is unexpectedly missing a UUID: " + << redact(oplogEntry.toBSON())); const auto idElem = oplogEntry.getIdElement(); if (!idElem.eoo()) { // We call BSONElement::wrap() on each _id element to create a new BSONObj with an owned // buffer, as the underlying storage may be gone when we access this map to write // rollback files. _observerInfo.rollbackDeletedIdsMap[uuid.get()].insert(idElem.wrap()); + const auto cmdName = opType == OpTypeEnum::kInsert ? kInsertCmdName : kUpdateCmdName; + ++_observerInfo.rollbackCommandCounts[cmdName]; } } @@ -426,6 +467,14 @@ Status RollbackImpl::_processRollbackOp(const OplogEntry& oplogEntry) { } } + // Keep count of major commands that will be rolled back. + if (opType == OpTypeEnum::kCommand) { + ++_observerInfo.rollbackCommandCounts[oplogEntry.getObject().firstElementFieldName()]; + } + if (opType == OpTypeEnum::kDelete) { + ++_observerInfo.rollbackCommandCounts[kDeleteCmdName]; + } + return Status::OK(); } @@ -557,7 +606,32 @@ void RollbackImpl::_writeRollbackFileForNamespace(OperationContext* opCtx, const SimpleBSONObjUnorderedSet& idSet) { Helpers::RemoveSaver removeSaver(kRollbackRemoveSaverType, nss.ns(), kRollbackRemoveSaverWhy); log() << "Preparing to write deleted documents to a rollback file for collection " << nss.ns() - << " with uuid " << uuid.toString() << " to " << removeSaver.fileName(); + << " with uuid " << uuid.toString() << " to " << removeSaver.file().generic_string(); + + // The RemoveSaver will save the data files in a directory structure similar to the following: + // + // rollback + // ├── db.collection + // │ └── removed.2018-03-20T20-23-01.21.bson + // ├── otherdb.othercollection + // │ ├── removed.2018-03-20T20-23-01.18.bson + // │ └── removed.2018-03-20T20-23-01.19.bson + // + // If this is the first data directory created, we save the full directory path in + // _rollbackStats. Otherwise, we store the longest common prefix of the two directories. + const auto& newDirectoryPath = removeSaver.root().generic_string(); + if (!_rollbackStats.rollbackDataFileDirectory) { + _rollbackStats.rollbackDataFileDirectory = newDirectoryPath; + } else { + const auto& existingDirectoryPath = *_rollbackStats.rollbackDataFileDirectory; + const auto& prefixEnd = std::mismatch(newDirectoryPath.begin(), + newDirectoryPath.end(), + existingDirectoryPath.begin(), + existingDirectoryPath.end()) + .first; + _rollbackStats.rollbackDataFileDirectory = std::string(newDirectoryPath.begin(), prefixEnd); + } + for (auto&& id : idSet) { // StorageInterface::findById() does not respect the collation, but because we are using // exact _id fields recorded in the oplog, we can get away with binary string @@ -640,5 +714,48 @@ void RollbackImpl::_resetDropPendingState(OperationContext* opCtx) { } } +void RollbackImpl::_summarizeRollback(OperationContext* opCtx) const { + log() << "Rollback summary:"; + log() << "\tstart time: " << _rollbackStats.startTime; + log() << "\tend time: " << opCtx->getServiceContext()->getFastClockSource()->now(); + log() << "\tsync source: " << _remoteOplog->hostAndPort().toString(); + log() << "\trollback data file directory: " + << _rollbackStats.rollbackDataFileDirectory.value_or("none; no files written"); + if (_rollbackStats.rollbackId) { + log() << "\trollback id: " << *_rollbackStats.rollbackId; + } + if (_rollbackStats.lastLocalOptime) { + log() << "\tlast optime on branch of history rolled back: " + << *_rollbackStats.lastLocalOptime; + } + if (_rollbackStats.commonPoint) { + log() << "\tcommon point: " << *_rollbackStats.commonPoint; + } + if (_rollbackStats.truncateTimestamp) { + log() << "\ttruncate timestamp: " << *_rollbackStats.truncateTimestamp; + } + if (_rollbackStats.stableTimestamp) { + log() << "\tstable timestamp: " << *_rollbackStats.stableTimestamp; + } + log() << "\tshard identity document rolled back: " << std::boolalpha + << _observerInfo.shardIdentityRolledBack; + log() << "\tconfig server config version document rolled back: " << std::boolalpha + << _observerInfo.configServerConfigVersionRolledBack; + log() << "\taffected sessions: " << (_observerInfo.rollbackSessionIds.empty() ? "none" : ""); + for (const auto& sessionId : _observerInfo.rollbackSessionIds) { + log() << "\t\t" << sessionId; + } + log() << "\taffected namespaces: " << (_observerInfo.rollbackNamespaces.empty() ? "none" : ""); + for (const auto& nss : _observerInfo.rollbackNamespaces) { + log() << "\t\t" << nss.ns(); + } + log() << "\tcounts of interesting commands rolled back: " + << (_observerInfo.rollbackCommandCounts.empty() ? "none" : ""); + for (const auto& entry : _observerInfo.rollbackCommandCounts) { + log() << "\t\t" << entry.first << ": " << entry.second; + } + log() << "\ttotal number of entries rolled back (including no-ops): " + << _observerInfo.numberOfEntriesObserved; +} } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/rollback_impl.h b/src/mongo/db/repl/rollback_impl.h index bd671ff5309..9be56195c22 100644 --- a/src/mongo/db/repl/rollback_impl.h +++ b/src/mongo/db/repl/rollback_impl.h @@ -47,6 +47,55 @@ class ReplicationCoordinator; class ReplicationProcess; /** + * Tracks statistics about rollback, and is used to generate a summary about what has occurred. + * Because it is possible for rollback to exit early, fields are initialized to boost::none and are + * populated with actual values during the rollback process. + */ +struct RollbackStats { + /** + * The wall clock time when rollback started. + */ + Date_t startTime; + + /** + * The wall clock time when rollback completed, either successfully or unsuccessfully. + */ + Date_t endTime; + + /** + * The id number generated for this rollback event. + */ + boost::optional<int> rollbackId; + + /** + * The last optime on the branch of history being rolled back. + */ + boost::optional<OpTime> lastLocalOptime; + + /** + * The optime of the latest shared oplog entry between this node and the sync source. + */ + boost::optional<OpTime> commonPoint; + + /** + * The value of the oplog truncate timestamp. This is the timestamp of the entry immediately + * after the common point on the local oplog (that is, on the branch of history being rolled + * back). + */ + boost::optional<Timestamp> truncateTimestamp; + + /** + * The value of the stable timestamp to which rollback recovered. + */ + boost::optional<Timestamp> stableTimestamp; + + /** + * The directory containing rollback data files, if any were written. + */ + boost::optional<std::string> rollbackDataFileDirectory; +}; + +/** * During steady state replication, it is possible to find the local server in a state where it * cannot replicate from a sync source. This can happen if the local server has gone offline and * comes back to find a new primary with an inconsistent set of operations in its oplog from the @@ -335,6 +384,11 @@ private: Status _writeRollbackFiles(OperationContext* opCtx); /** + * Logs a summary of what has occurred so far during rollback to the server log. + */ + void _summarizeRollback(OperationContext* opCtx) const; + + /** * Aligns the drop pending reaper's state with the catalog. */ void _resetDropPendingState(OperationContext* opCtx); @@ -366,6 +420,9 @@ private: // Contains information about the rollback that will be passed along to the rollback OpObserver // method. OpObserver::RollbackObserverInfo _observerInfo = {}; // (N) + + // Holds information about this rollback event. + RollbackStats _rollbackStats; // (N) }; } // namespace repl |