diff options
author | Daniel Gottlieb <daniel.gottlieb@mongodb.com> | 2018-05-14 11:09:50 -0400 |
---|---|---|
committer | Daniel Gottlieb <daniel.gottlieb@mongodb.com> | 2018-05-14 11:09:50 -0400 |
commit | 8e27c2b49ba17fe9b02695efb29a6322b56c2f23 (patch) | |
tree | d8b0682eee38a4655bf8d61b00143df6a1153a4d /src/mongo/db/index_builder.cpp | |
parent | 074c9725f1e6ad9b005ab08acc1aedf34e537d69 (diff) | |
download | mongo-8e27c2b49ba17fe9b02695efb29a6322b56c2f23.tar.gz |
SERVER-34896: Move timestamping responsibility on index completion to callers.
Previously, `MultiIndexBlockImpl` would use member state as a proxy for whether
an index build originated from a `createIndexes` command. This information was
used to determine if the completion of the index needed to be explicitly
timestamped, or whether an oplog entry would timestamp the write.
However, nodes in a primary state can build indexes during primary "catch-up"
or "drain". Those index builds originated from an oplog entry and thus will not
generate one on completion. These index builds need to be explicitly
timestamped.
Diffstat (limited to 'src/mongo/db/index_builder.cpp')
-rw-r--r-- | src/mongo/db/index_builder.cpp | 44 |
1 files changed, 43 insertions, 1 deletions
diff --git a/src/mongo/db/index_builder.cpp b/src/mongo/db/index_builder.cpp index d915fc6590b..8dbef6caa25 100644 --- a/src/mongo/db/index_builder.cpp +++ b/src/mongo/db/index_builder.cpp @@ -40,6 +40,7 @@ #include "mongo/db/concurrency/write_conflict_exception.h" #include "mongo/db/curop.h" #include "mongo/db/db_raii.h" +#include "mongo/db/logical_clock.h" #include "mongo/db/repl/timestamp_block.h" #include "mongo/util/assert_util.h" #include "mongo/util/log.h" @@ -52,6 +53,42 @@ using std::endl; AtomicUInt32 IndexBuilder::_indexBuildCount; namespace { + +/** + * Returns true if writes to the catalog entry for the input namespace require being + * timestamped. A ghost write is when the operation is not committed with an oplog entry and + * implies the caller will look at the logical clock to choose a time to use. + */ +bool requiresGhostCommitTimestamp(OperationContext* opCtx, NamespaceString nss) { + if (!nss.isReplicated() || nss.coll().startsWith("tmp.mr")) { + return false; + } + + auto replCoord = repl::ReplicationCoordinator::get(opCtx); + if (!replCoord->getSettings().usingReplSets()) { + return false; + } + + // If there is a commit timestamp already assigned, there's no need to explicitly assign a + // timestamp. This case covers foreground index builds. + if (!opCtx->recoveryUnit()->getCommitTimestamp().isNull()) { + return false; + } + + // Only oplog entries (including a user's `applyOps` command) construct indexes via + // `IndexBuilder`. Nodes in `startup` may not yet have initialized the `LogicalClock`, however + // index builds during startup replication recovery must be timestamped. These index builds + // are foregrounded and timestamp their catalog writes with a "commit timestamp". Nodes in the + // oplog application phase of initial sync (`startup2`) must not timestamp index builds before + // the `initialDataTimestamp`. + const auto memberState = replCoord->getMemberState(); + if (memberState.startup() || memberState.startup2()) { + return false; + } + + return true; +} + // Synchronization tools when replication spawns a background index in a new thread. // The bool is 'true' when a new background index has started in a new thread but the // parent thread has not yet synchronized with it. @@ -202,9 +239,14 @@ Status IndexBuilder::_build(OperationContext* opCtx, if (allowBackgroundBuilding) { dbLock->relockWithMode(MODE_X); } - writeConflictRetry(opCtx, "Commit index build", ns.ns(), [opCtx, &indexer] { + writeConflictRetry(opCtx, "Commit index build", ns.ns(), [opCtx, &indexer, &ns] { WriteUnitOfWork wunit(opCtx); indexer.commit(); + if (requiresGhostCommitTimestamp(opCtx, ns)) { + fassert(50701, + opCtx->recoveryUnit()->setTimestamp( + LogicalClock::get(opCtx)->getClusterTime().asTimestamp())); + } wunit.commit(); }); |