diff options
author | Judah Schvimer <judah@mongodb.com> | 2017-06-22 17:52:14 -0400 |
---|---|---|
committer | Judah Schvimer <judah@mongodb.com> | 2017-06-22 17:52:14 -0400 |
commit | 669a3c4c28b74d8a68f1556c9e185439380be61a (patch) | |
tree | f0dc5ab58a93c600a62bbc9070f05f841f6a992b | |
parent | 19fb2eca839609a0b6d34faa11daee381f4eeff4 (diff) | |
download | mongo-669a3c4c28b74d8a68f1556c9e185439380be61a.tar.gz |
SERVER-29254 move OplogTruncateAfterPoint into its own collection
-rw-r--r-- | jstests/replsets/clean_shutdown_oplog_state.js | 15 | ||||
-rw-r--r-- | jstests/replsets/oplog_replay_on_startup.js | 15 | ||||
-rw-r--r-- | jstests/replsets/oplog_truncated_on_recovery.js | 17 | ||||
-rw-r--r-- | src/mongo/db/repl/SConscript | 2 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_consistency_markers.h | 16 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_consistency_markers.idl | 13 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_consistency_markers_impl.cpp | 118 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_consistency_markers_impl.h | 28 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_consistency_markers_impl_test.cpp | 154 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_consistency_markers_mock.cpp | 12 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_consistency_markers_mock.h | 6 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_external_state_impl.cpp | 26 | ||||
-rw-r--r-- | src/mongo/db/repl/sync_tail.cpp | 4 |
13 files changed, 303 insertions, 123 deletions
diff --git a/jstests/replsets/clean_shutdown_oplog_state.js b/jstests/replsets/clean_shutdown_oplog_state.js index 5ac2f72d556..ec7efacf15e 100644 --- a/jstests/replsets/clean_shutdown_oplog_state.js +++ b/jstests/replsets/clean_shutdown_oplog_state.js @@ -59,8 +59,8 @@ // Following clean shutdown of a node, the oplog must exactly match the applied operations. // Additionally, the begin field must not be in the minValid document, the ts must match the - // top of the oplog (SERVER-25353), and the oplogDeleteFromPoint must be null (SERVER-7200 and - // SERVER-25071). + // top of the oplog (SERVER-25353), and the oplogTruncateAfterPoint must be null (SERVER-7200 + // and SERVER-25071). var oplogDoc = conn.getCollection('local.oplog.rs') .find({ns: 'test.coll'}) .sort({$natural: -1}) @@ -68,12 +68,19 @@ var collDoc = conn.getCollection('test.coll').find().sort({_id: -1}).limit(1)[0]; var minValidDoc = conn.getCollection('local.replset.minvalid').find().sort({$natural: -1}).limit(1)[0]; - printjson({oplogDoc: oplogDoc, collDoc: collDoc, minValidDoc: minValidDoc}); + var oplogTruncateAfterPointDoc = + conn.getCollection('local.replset.oplogTruncateAfterPoint').find().limit(1)[0]; + printjson({ + oplogDoc: oplogDoc, + collDoc: collDoc, + minValidDoc: minValidDoc, + oplogTruncateAfterPointDoc: oplogTruncateAfterPointDoc + }); try { assert.eq(collDoc._id, oplogDoc.o._id); assert(!('begin' in minValidDoc), 'begin in minValidDoc'); - assert.eq(minValidDoc.oplogDeleteFromPoint, Timestamp()); assert.eq(minValidDoc.ts, oplogDoc.ts); + assert.eq(oplogTruncateAfterPointDoc.oplogTruncateAfterPoint, Timestamp()); } catch (e) { // TODO remove once SERVER-25777 is resolved. jsTest.log( diff --git a/jstests/replsets/oplog_replay_on_startup.js b/jstests/replsets/oplog_replay_on_startup.js index 248d04aa1a3..8684436fee7 100644 --- a/jstests/replsets/oplog_replay_on_startup.js +++ b/jstests/replsets/oplog_replay_on_startup.js @@ -45,6 +45,7 @@ assert.neq(null, conn, "failed to restart"); var oplog = conn.getCollection('local.oplog.rs'); var minValidColl = conn.getCollection('local.replset.minvalid'); + var oplogTruncateAfterColl = conn.getCollection('local.replset.oplogTruncateAfterPoint'); var coll = conn.getCollection(ns); // Reset state to empty. @@ -78,13 +79,16 @@ t: term, }, - oplogDeleteFromPoint: ts(deletePoint), - // minvalid: t: term, ts: ts(minValid), }; + var injectedOplogTruncateAfterPointDoc = { + _id: "oplogTruncateAfterPoint", + oplogTruncateAfterPoint: ts(deletePoint) + }; + // This weird mechanism is the only way to bypass mongod's attempt to fill in null // Timestamps. assert.writeOK(minValidColl.remove({})); @@ -93,6 +97,13 @@ injectedMinValidDoc, "If the Timestamps differ, the server may be filling in the null timestamps"); + assert.writeOK(oplogTruncateAfterColl.remove({})); + assert.writeOK(oplogTruncateAfterColl.update( + {}, {$set: injectedOplogTruncateAfterPointDoc}, {upsert: true})); + assert.eq(oplogTruncateAfterColl.findOne(), + injectedOplogTruncateAfterPointDoc, + "If the Timestamps differ, the server may be filling in the null timestamps"); + try { conn = rst.restart(0); // Restart in replSet mode again. } catch (e) { diff --git a/jstests/replsets/oplog_truncated_on_recovery.js b/jstests/replsets/oplog_truncated_on_recovery.js index 477da7b4d92..cca4051b369 100644 --- a/jstests/replsets/oplog_truncated_on_recovery.js +++ b/jstests/replsets/oplog_truncated_on_recovery.js @@ -36,6 +36,7 @@ var testDB = master.getDB("test"); var localDB = master.getDB("local"); var minvalidColl = localDB["replset.minvalid"]; + var oplogTruncateAfterColl = localDB["replset.oplogTruncateAfterPoint"]; // Write op log(assert.writeOK(testDB.foo.save({_id: 1, a: 1}, {writeConcern: {w: 1}}))); @@ -63,13 +64,23 @@ ts: farFutureTS, t: NumberLong(-1), begin: primaryOpTime, - oplogDeleteFromPoint: divergedTS }, {upsert: true, writeConcern: {w: 1}}))); + log(assert.writeOK(oplogTruncateAfterColl.update({_id: "oplogTruncateAfterPoint"}, + {oplogTruncateAfterPoint: divergedTS}, + {upsert: true, writeConcern: {w: 1}}))); + // Insert a diverged oplog entry that will be truncated after restart. - log(assert.writeOK(localDB.oplog.rs.insert( - {_id: 0, ts: divergedTS, op: "n", h: NumberLong(0), t: NumberLong(-1)}))); + log(assert.writeOK(localDB.oplog.rs.insert({ + _id: ObjectId(), + ns: "", + ts: divergedTS, + op: "n", + h: NumberLong(0), + t: NumberLong(-1), + o: {} + }))); log(localDB.oplog.rs.find().toArray()); log(assert.commandWorked(localDB.adminCommand("replSetGetStatus"))); log("restart primary"); diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index 88398e6a9cf..6b416f7ab50 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -204,7 +204,7 @@ env.Library( LIBDEPS=[ 'optime', 'repl_coordinator_impl', - 'replication_consistency_markers_idl', + 'replication_consistency_markers_idl', ], ) diff --git a/src/mongo/db/repl/replication_consistency_markers.h b/src/mongo/db/repl/replication_consistency_markers.h index d6cb43126cb..aadc2491b62 100644 --- a/src/mongo/db/repl/replication_consistency_markers.h +++ b/src/mongo/db/repl/replication_consistency_markers.h @@ -59,7 +59,16 @@ class StorageInterface; * ts: <Timestamp>, * t: <long long> * }, // field for 'appliedThrough' - * oplogDeleteFromPoint: <Timestamp> + * } + * + * The oplogTruncateAfterPoint document, in 'local.replset.oplogTruncateAfterPoint', is used to + * indicate how much of the oplog is consistent and where the oplog should be truncated when + * entering recovery. + * + * Example of all fields: + * { + * _id: 'oplogTruncateAfterPoint', + * oplogTruncateAfterPoint: <Timestamp> * } * * See below for explanations of each field. @@ -142,8 +151,9 @@ public: * * If null, no documents should be deleted. */ - virtual void setOplogDeleteFromPoint(OperationContext* opCtx, const Timestamp& timestamp) = 0; - virtual Timestamp getOplogDeleteFromPoint(OperationContext* opCtx) const = 0; + virtual void setOplogTruncateAfterPoint(OperationContext* opCtx, + const Timestamp& timestamp) = 0; + virtual Timestamp getOplogTruncateAfterPoint(OperationContext* opCtx) const = 0; // -------- Applied Through ---------- diff --git a/src/mongo/db/repl/replication_consistency_markers.idl b/src/mongo/db/repl/replication_consistency_markers.idl index 770b950ab33..d7a7ba6be00 100644 --- a/src/mongo/db/repl/replication_consistency_markers.idl +++ b/src/mongo/db/repl/replication_consistency_markers.idl @@ -45,10 +45,6 @@ structs: cpp_name: minValidTerm type: safeInt64 description: "The term for the OpTime at which the data on disk will be consistent; -1 for PV0." - oplogDeleteFromPoint: - type: timestamp - optional: true - description: "The timestamp of the first oplog entry in a batch when we are writing oplog entries to the oplog after which the oplog may be inconsistent." begin: cpp_name: appliedThrough type: optime @@ -63,3 +59,12 @@ structs: type: objectid optional: true # This is automatically created when the document is upserted description: "An objectid that is not used but is automatically generated" + OplogTruncateAfterPointDocument: + description: A document in which the server stores information on where to truncate the oplog on unclean shutdown. + fields: + oplogTruncateAfterPoint: + type: timestamp + description: "The timestamp of the first oplog entry in a batch when we are writing oplog entries to the oplog after which the oplog may be inconsistent." + _id: + type: string + description: "Always set to 'oplogTruncateAfterPoint' to easily retrieve it." diff --git a/src/mongo/db/repl/replication_consistency_markers_impl.cpp b/src/mongo/db/repl/replication_consistency_markers_impl.cpp index 6e262aa7b4a..a49da8acdaa 100644 --- a/src/mongo/db/repl/replication_consistency_markers_impl.cpp +++ b/src/mongo/db/repl/replication_consistency_markers_impl.cpp @@ -1,4 +1,3 @@ - /** * Copyright (C) 2017 MongoDB Inc. * @@ -44,20 +43,30 @@ namespace mongo { namespace repl { constexpr StringData ReplicationConsistencyMarkersImpl::kDefaultMinValidNamespace; +constexpr StringData ReplicationConsistencyMarkersImpl::kDefaultOplogTruncateAfterPointNamespace; +constexpr StringData ReplicationConsistencyMarkersImpl::kOldOplogDeleteFromPointFieldName; namespace { const BSONObj kInitialSyncFlag(BSON(MinValidDocument::kInitialSyncFlagFieldName << true)); +const BSONObj kOplogTruncateAfterPointId(BSON("_id" + << "oplogTruncateAfterPoint")); } // namespace ReplicationConsistencyMarkersImpl::ReplicationConsistencyMarkersImpl( StorageInterface* storageInterface) : ReplicationConsistencyMarkersImpl( storageInterface, - NamespaceString(ReplicationConsistencyMarkersImpl::kDefaultMinValidNamespace)) {} + NamespaceString(ReplicationConsistencyMarkersImpl::kDefaultMinValidNamespace), + NamespaceString( + ReplicationConsistencyMarkersImpl::kDefaultOplogTruncateAfterPointNamespace)) {} ReplicationConsistencyMarkersImpl::ReplicationConsistencyMarkersImpl( - StorageInterface* storageInterface, NamespaceString minValidNss) - : _storageInterface(storageInterface), _minValidNss(minValidNss) {} + StorageInterface* storageInterface, + NamespaceString minValidNss, + NamespaceString oplogTruncateAfterPointNss) + : _storageInterface(storageInterface), + _minValidNss(minValidNss), + _oplogTruncateAfterPointNss(oplogTruncateAfterPointNss) {} boost::optional<MinValidDocument> ReplicationConsistencyMarkersImpl::_getMinValidDocument( OperationContext* opCtx) const { @@ -83,9 +92,9 @@ void ReplicationConsistencyMarkersImpl::_updateMinValidDocument(OperationContext // If the collection doesn't exist, create it and try again. if (status == ErrorCodes::NamespaceNotFound) { status = _storageInterface->createCollection(opCtx, _minValidNss, CollectionOptions()); - if (status.isOK()) { - status = _storageInterface->putSingleton(opCtx, _minValidNss, updateSpec); - } + fassertStatusOK(40509, status); + + status = _storageInterface->putSingleton(opCtx, _minValidNss, updateSpec); } fassertStatusOK(40467, status); @@ -96,14 +105,17 @@ void ReplicationConsistencyMarkersImpl::initializeMinValidDocument(OperationCont // This initializes the values of the required fields if they are not already set. // If one of the fields is already set, the $max will prefer the existing value since it - // will always be greater than the provided ones. + // will always be greater than the provided ones. We unset the old 'oplogDeleteFromPoint' + // field so that we can remove it from the IDL struct. This is required because servers + // upgrading from 3.4 may have created an 'oplogDeleteFromPoint' field already. The field + // is guaranteed to be empty on clean shutdown and thus on upgrade, but may still exist. _updateMinValidDocument(opCtx, BSON("$max" << BSON(MinValidDocument::kMinValidTimestampFieldName << Timestamp() << MinValidDocument::kMinValidTermFieldName - << OpTime::kUninitializedTerm - << MinValidDocument::kOplogDeleteFromPointFieldName - << Timestamp()))); + << OpTime::kUninitializedTerm) + << "$unset" + << BSON(kOldOplogDeleteFromPointFieldName << 1))); } bool ReplicationConsistencyMarkersImpl::getInitialSyncFlag(OperationContext* opCtx) const { @@ -179,28 +191,6 @@ void ReplicationConsistencyMarkersImpl::setMinValidToAtLeast(OperationContext* o << minValid.getTerm()))); } -void ReplicationConsistencyMarkersImpl::setOplogDeleteFromPoint(OperationContext* opCtx, - const Timestamp& timestamp) { - LOG(3) << "setting oplog delete from point to: " << timestamp.toStringPretty(); - _updateMinValidDocument( - opCtx, BSON("$set" << BSON(MinValidDocument::kOplogDeleteFromPointFieldName << timestamp))); -} - -Timestamp ReplicationConsistencyMarkersImpl::getOplogDeleteFromPoint( - OperationContext* opCtx) const { - auto doc = _getMinValidDocument(opCtx); - invariant(doc); // Initialized at startup so it should never be missing. - - auto oplogDeleteFromPoint = doc->getOplogDeleteFromPoint(); - if (!oplogDeleteFromPoint) { - LOG(3) << "No oplogDeleteFromPoint timestamp set, returning empty timestamp."; - return Timestamp(); - } - - LOG(3) << "returning oplog delete from point: " << oplogDeleteFromPoint.get(); - return oplogDeleteFromPoint.get(); -} - void ReplicationConsistencyMarkersImpl::setAppliedThrough(OperationContext* opCtx, const OpTime& optime) { LOG(3) << "setting appliedThrough to: " << optime.toString() << "(" << optime.toBSON() << ")"; @@ -228,5 +218,67 @@ OpTime ReplicationConsistencyMarkersImpl::getAppliedThrough(OperationContext* op return appliedThrough.get(); } +boost::optional<OplogTruncateAfterPointDocument> +ReplicationConsistencyMarkersImpl::_getOplogTruncateAfterPointDocument( + OperationContext* opCtx) const { + auto doc = _storageInterface->findById( + opCtx, _oplogTruncateAfterPointNss, kOplogTruncateAfterPointId["_id"]); + + if (!doc.isOK()) { + if (doc.getStatus() == ErrorCodes::NoSuchKey || + doc.getStatus() == ErrorCodes::NamespaceNotFound) { + return boost::none; + } else { + // Fails if there is an error other than the collection being missing or being empty. + fassertFailedWithStatus(40510, doc.getStatus()); + } + } + + auto oplogTruncateAfterPoint = OplogTruncateAfterPointDocument::parse( + IDLParserErrorContext("OplogTruncateAfterPointDocument"), doc.getValue()); + return oplogTruncateAfterPoint; +} + +void ReplicationConsistencyMarkersImpl::_upsertOplogTruncateAfterPointDocument( + OperationContext* opCtx, const BSONObj& updateSpec) { + auto status = _storageInterface->upsertById( + opCtx, _oplogTruncateAfterPointNss, kOplogTruncateAfterPointId["_id"], updateSpec); + + // If the collection doesn't exist, creates it and tries again. + if (status == ErrorCodes::NamespaceNotFound) { + status = _storageInterface->createCollection( + opCtx, _oplogTruncateAfterPointNss, CollectionOptions()); + fassertStatusOK(40511, status); + + status = _storageInterface->upsertById( + opCtx, _oplogTruncateAfterPointNss, kOplogTruncateAfterPointId["_id"], updateSpec); + } + + fassertStatusOK(40512, status); +} + +void ReplicationConsistencyMarkersImpl::setOplogTruncateAfterPoint(OperationContext* opCtx, + const Timestamp& timestamp) { + LOG(3) << "setting oplog truncate after point to: " << timestamp.toBSON(); + _upsertOplogTruncateAfterPointDocument( + opCtx, + BSON("$set" << BSON(OplogTruncateAfterPointDocument::kOplogTruncateAfterPointFieldName + << timestamp))); +} + +Timestamp ReplicationConsistencyMarkersImpl::getOplogTruncateAfterPoint( + OperationContext* opCtx) const { + auto doc = _getOplogTruncateAfterPointDocument(opCtx); + if (!doc) { + LOG(3) << "Returning empty oplog truncate after point since document did not exist"; + return {}; + } + + Timestamp out = doc->getOplogTruncateAfterPoint(); + + LOG(3) << "returning oplog truncate after point: " << out; + return out; +} + } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/replication_consistency_markers_impl.h b/src/mongo/db/repl/replication_consistency_markers_impl.h index 6ca4a315f5c..9de5a545a29 100644 --- a/src/mongo/db/repl/replication_consistency_markers_impl.h +++ b/src/mongo/db/repl/replication_consistency_markers_impl.h @@ -49,10 +49,15 @@ class ReplicationConsistencyMarkersImpl : public ReplicationConsistencyMarkers { public: static constexpr StringData kDefaultMinValidNamespace = "local.replset.minvalid"_sd; + static constexpr StringData kDefaultOplogTruncateAfterPointNamespace = + "local.replset.oplogTruncateAfterPoint"_sd; + // TODO: Remove this constant and its usage in minValid initialization in 3.8. + static constexpr StringData kOldOplogDeleteFromPointFieldName = "oplogDeleteFromPoint"_sd; explicit ReplicationConsistencyMarkersImpl(StorageInterface* storageInterface); ReplicationConsistencyMarkersImpl(StorageInterface* storageInterface, - NamespaceString minValidNss); + NamespaceString minValidNss, + NamespaceString oplogTruncateAfterNss); void initializeMinValidDocument(OperationContext* opCtx) override; @@ -64,8 +69,8 @@ public: void setMinValid(OperationContext* opCtx, const OpTime& minValid) override; void setMinValidToAtLeast(OperationContext* opCtx, const OpTime& minValid) override; - void setOplogDeleteFromPoint(OperationContext* opCtx, const Timestamp& timestamp) override; - Timestamp getOplogDeleteFromPoint(OperationContext* opCtx) const override; + void setOplogTruncateAfterPoint(OperationContext* opCtx, const Timestamp& timestamp) override; + Timestamp getOplogTruncateAfterPoint(OperationContext* opCtx) const override; void setAppliedThrough(OperationContext* opCtx, const OpTime& optime) override; OpTime getAppliedThrough(OperationContext* opCtx) const override; @@ -85,8 +90,25 @@ private: */ void _updateMinValidDocument(OperationContext* opCtx, const BSONObj& updateSpec); + /** + * Reads the OplogTruncateAfterPoint document from disk. + * Returns boost::none if not present. + */ + boost::optional<OplogTruncateAfterPointDocument> _getOplogTruncateAfterPointDocument( + OperationContext* opCtx) const; + + /** + * Upserts the OplogTruncateAfterPoint document according to the provided update spec. + * If the collection does not exist, it is created. If the document does not exist, + * it is upserted. + * + * This fasserts on failure. + */ + void _upsertOplogTruncateAfterPointDocument(OperationContext* opCtx, const BSONObj& updateSpec); + StorageInterface* _storageInterface; const NamespaceString _minValidNss; + const NamespaceString _oplogTruncateAfterPointNss; }; } // namespace repl diff --git a/src/mongo/db/repl/replication_consistency_markers_impl_test.cpp b/src/mongo/db/repl/replication_consistency_markers_impl_test.cpp index 179a792dd60..1306811fce4 100644 --- a/src/mongo/db/repl/replication_consistency_markers_impl_test.cpp +++ b/src/mongo/db/repl/replication_consistency_markers_impl_test.cpp @@ -66,16 +66,35 @@ NamespaceString makeNamespace(const T& t, const std::string& suffix = "") { * Returns min valid document. */ BSONObj getMinValidDocument(OperationContext* opCtx, const NamespaceString& minValidNss) { - MONGO_WRITE_CONFLICT_RETRY_LOOP_BEGIN { + return writeConflictRetry(opCtx, "getMinValidDocument", minValidNss.ns(), [opCtx, minValidNss] { Lock::DBLock dblk(opCtx, minValidNss.db(), MODE_IS); Lock::CollectionLock lk(opCtx->lockState(), minValidNss.ns(), MODE_IS); BSONObj mv; if (Helpers::getSingleton(opCtx, minValidNss.ns().c_str(), mv)) { return mv; } - } - MONGO_WRITE_CONFLICT_RETRY_LOOP_END(opCtx, "getMinValidDocument", minValidNss.ns()); - return BSONObj(); + return mv; + }); +} + +/** + * Returns oplog truncate after point document. + */ +BSONObj getOplogTruncateAfterPointDocument(OperationContext* opCtx, + const NamespaceString& oplogTruncateAfterPointNss) { + return writeConflictRetry( + opCtx, + "getOplogTruncateAfterPointDocument", + oplogTruncateAfterPointNss.ns(), + [opCtx, oplogTruncateAfterPointNss] { + Lock::DBLock dblk(opCtx, oplogTruncateAfterPointNss.db(), MODE_IS); + Lock::CollectionLock lk(opCtx->lockState(), oplogTruncateAfterPointNss.ns(), MODE_IS); + BSONObj mv; + if (Helpers::getSingleton(opCtx, oplogTruncateAfterPointNss.ns().c_str(), mv)) { + return mv; + } + return mv; + }); } class ReplicationConsistencyMarkersTest : public ServiceContextMongoDTest { @@ -126,89 +145,101 @@ bool RecoveryUnitWithDurabilityTracking::waitUntilDurable() { } TEST_F(ReplicationConsistencyMarkersTest, InitialSyncFlag) { - auto nss = makeNamespace(_agent); + auto minValidNss = makeNamespace(_agent, "minValid"); + auto oplogTruncateAfterPointNss = makeNamespace(_agent, "oplogTruncateAfterPoint"); - ReplicationConsistencyMarkersImpl minValid(getStorageInterface(), nss); + ReplicationConsistencyMarkersImpl consistencyMarkers( + getStorageInterface(), minValidNss, oplogTruncateAfterPointNss); auto opCtx = getOperationContext(); - minValid.initializeMinValidDocument(opCtx); + consistencyMarkers.initializeMinValidDocument(opCtx); // Initial sync flag should be unset after initializing a new storage engine. - ASSERT_FALSE(minValid.getInitialSyncFlag(opCtx)); + ASSERT_FALSE(consistencyMarkers.getInitialSyncFlag(opCtx)); // Setting initial sync flag should affect getInitialSyncFlag() result. - minValid.setInitialSyncFlag(opCtx); - ASSERT_TRUE(minValid.getInitialSyncFlag(opCtx)); + consistencyMarkers.setInitialSyncFlag(opCtx); + ASSERT_TRUE(consistencyMarkers.getInitialSyncFlag(opCtx)); // Check min valid document using storage engine interface. - auto minValidDocument = getMinValidDocument(opCtx, nss); + auto minValidDocument = getMinValidDocument(opCtx, minValidNss); ASSERT_TRUE(minValidDocument.hasField(MinValidDocument::kInitialSyncFlagFieldName)); ASSERT_TRUE(minValidDocument.getBoolField(MinValidDocument::kInitialSyncFlagFieldName)); // Clearing initial sync flag should affect getInitialSyncFlag() result. - minValid.clearInitialSyncFlag(opCtx); - ASSERT_FALSE(minValid.getInitialSyncFlag(opCtx)); + consistencyMarkers.clearInitialSyncFlag(opCtx); + ASSERT_FALSE(consistencyMarkers.getInitialSyncFlag(opCtx)); } TEST_F(ReplicationConsistencyMarkersTest, GetMinValidAfterSettingInitialSyncFlagWorks) { - auto nss = makeNamespace(_agent); + auto minValidNss = makeNamespace(_agent, "minValid"); + auto oplogTruncateAfterPointNss = makeNamespace(_agent, "oplogTruncateAfterPoint"); - ReplicationConsistencyMarkersImpl minValid(getStorageInterface(), nss); + ReplicationConsistencyMarkersImpl consistencyMarkers( + getStorageInterface(), minValidNss, oplogTruncateAfterPointNss); auto opCtx = getOperationContext(); - minValid.initializeMinValidDocument(opCtx); + consistencyMarkers.initializeMinValidDocument(opCtx); // Initial sync flag should be unset after initializing a new storage engine. - ASSERT_FALSE(minValid.getInitialSyncFlag(opCtx)); + ASSERT_FALSE(consistencyMarkers.getInitialSyncFlag(opCtx)); // Setting initial sync flag should affect getInitialSyncFlag() result. - minValid.setInitialSyncFlag(opCtx); - ASSERT_TRUE(minValid.getInitialSyncFlag(opCtx)); + consistencyMarkers.setInitialSyncFlag(opCtx); + ASSERT_TRUE(consistencyMarkers.getInitialSyncFlag(opCtx)); - ASSERT(minValid.getMinValid(opCtx).isNull()); - ASSERT(minValid.getAppliedThrough(opCtx).isNull()); - ASSERT(minValid.getOplogDeleteFromPoint(opCtx).isNull()); + ASSERT(consistencyMarkers.getMinValid(opCtx).isNull()); + ASSERT(consistencyMarkers.getAppliedThrough(opCtx).isNull()); + ASSERT(consistencyMarkers.getOplogTruncateAfterPoint(opCtx).isNull()); } TEST_F(ReplicationConsistencyMarkersTest, ReplicationConsistencyMarkers) { - auto nss = makeNamespace(_agent); + auto minValidNss = makeNamespace(_agent, "minValid"); + auto oplogTruncateAfterPointNss = makeNamespace(_agent, "oplogTruncateAfterPoint"); - ReplicationConsistencyMarkersImpl minValid(getStorageInterface(), nss); + ReplicationConsistencyMarkersImpl consistencyMarkers( + getStorageInterface(), minValidNss, oplogTruncateAfterPointNss); auto opCtx = getOperationContext(); - minValid.initializeMinValidDocument(opCtx); + consistencyMarkers.initializeMinValidDocument(opCtx); // MinValid boundaries should all be null after initializing a new storage engine. - ASSERT(minValid.getMinValid(opCtx).isNull()); - ASSERT(minValid.getAppliedThrough(opCtx).isNull()); - ASSERT(minValid.getOplogDeleteFromPoint(opCtx).isNull()); + ASSERT(consistencyMarkers.getMinValid(opCtx).isNull()); + ASSERT(consistencyMarkers.getAppliedThrough(opCtx).isNull()); + ASSERT(consistencyMarkers.getOplogTruncateAfterPoint(opCtx).isNull()); // Setting min valid boundaries should affect getMinValid() result. OpTime startOpTime({Seconds(123), 0}, 1LL); OpTime endOpTime({Seconds(456), 0}, 1LL); - minValid.setAppliedThrough(opCtx, startOpTime); - minValid.setMinValid(opCtx, endOpTime); - minValid.setOplogDeleteFromPoint(opCtx, endOpTime.getTimestamp()); + consistencyMarkers.setAppliedThrough(opCtx, startOpTime); + consistencyMarkers.setMinValid(opCtx, endOpTime); + consistencyMarkers.setOplogTruncateAfterPoint(opCtx, endOpTime.getTimestamp()); - ASSERT_EQ(minValid.getAppliedThrough(opCtx), startOpTime); - ASSERT_EQ(minValid.getMinValid(opCtx), endOpTime); - ASSERT_EQ(minValid.getOplogDeleteFromPoint(opCtx), endOpTime.getTimestamp()); + ASSERT_EQ(consistencyMarkers.getAppliedThrough(opCtx), startOpTime); + ASSERT_EQ(consistencyMarkers.getMinValid(opCtx), endOpTime); + ASSERT_EQ(consistencyMarkers.getOplogTruncateAfterPoint(opCtx), endOpTime.getTimestamp()); // setMinValid always changes minValid, but setMinValidToAtLeast only does if higher. - minValid.setMinValid(opCtx, startOpTime); // Forcibly lower it. - ASSERT_EQ(minValid.getMinValid(opCtx), startOpTime); - minValid.setMinValidToAtLeast(opCtx, endOpTime); // Higher than current (sets it). - ASSERT_EQ(minValid.getMinValid(opCtx), endOpTime); - minValid.setMinValidToAtLeast(opCtx, startOpTime); // Lower than current (no-op). - ASSERT_EQ(minValid.getMinValid(opCtx), endOpTime); + consistencyMarkers.setMinValid(opCtx, startOpTime); // Forcibly lower it. + ASSERT_EQ(consistencyMarkers.getMinValid(opCtx), startOpTime); + consistencyMarkers.setMinValidToAtLeast(opCtx, endOpTime); // Higher than current (sets it). + ASSERT_EQ(consistencyMarkers.getMinValid(opCtx), endOpTime); + consistencyMarkers.setMinValidToAtLeast(opCtx, startOpTime); // Lower than current (no-op). + ASSERT_EQ(consistencyMarkers.getMinValid(opCtx), endOpTime); // Check min valid document using storage engine interface. - auto minValidDocument = getMinValidDocument(opCtx, nss); + auto minValidDocument = getMinValidDocument(opCtx, minValidNss); ASSERT_TRUE(minValidDocument.hasField(MinValidDocument::kAppliedThroughFieldName)); ASSERT_TRUE(minValidDocument[MinValidDocument::kAppliedThroughFieldName].isABSONObj()); ASSERT_EQUALS(startOpTime, unittest::assertGet(OpTime::parseFromOplogEntry( minValidDocument[MinValidDocument::kAppliedThroughFieldName].Obj()))); ASSERT_EQUALS(endOpTime, unittest::assertGet(OpTime::parseFromOplogEntry(minValidDocument))); + + // Check oplog truncate after point document. + auto oplogTruncateAfterPointDocument = + getOplogTruncateAfterPointDocument(opCtx, oplogTruncateAfterPointNss); ASSERT_EQUALS(endOpTime.getTimestamp(), - minValidDocument[MinValidDocument::kOplogDeleteFromPointFieldName].timestamp()); + oplogTruncateAfterPointDocument + [OplogTruncateAfterPointDocument::kOplogTruncateAfterPointFieldName] + .timestamp()); // Recovery unit will be owned by "opCtx". RecoveryUnitWithDurabilityTracking* recoveryUnit = new RecoveryUnitWithDurabilityTracking(); @@ -216,11 +247,42 @@ TEST_F(ReplicationConsistencyMarkersTest, ReplicationConsistencyMarkers) { // Set min valid without waiting for the changes to be durable. OpTime endOpTime2({Seconds(789), 0}, 1LL); - minValid.setMinValid(opCtx, endOpTime2); - minValid.setAppliedThrough(opCtx, {}); - ASSERT_EQUALS(minValid.getAppliedThrough(opCtx), OpTime()); - ASSERT_EQUALS(minValid.getMinValid(opCtx), endOpTime2); + consistencyMarkers.setMinValid(opCtx, endOpTime2); + consistencyMarkers.setAppliedThrough(opCtx, {}); + ASSERT_EQUALS(consistencyMarkers.getAppliedThrough(opCtx), OpTime()); + ASSERT_EQUALS(consistencyMarkers.getMinValid(opCtx), endOpTime2); ASSERT_FALSE(recoveryUnit->waitUntilDurableCalled); } +TEST_F(ReplicationConsistencyMarkersTest, OplogTruncateAfterPointUpgrade) { + auto minValidNss = makeNamespace(_agent, "minValid"); + auto oplogTruncateAfterPointNss = makeNamespace(_agent, "oplogTruncateAfterPoint"); + + ReplicationConsistencyMarkersImpl consistencyMarkers( + getStorageInterface(), minValidNss, oplogTruncateAfterPointNss); + auto opCtx = getOperationContext(); + Timestamp time1(Seconds(123), 0); + Timestamp time2(Seconds(456), 0); + OpTime minValidTime(Timestamp(789), 2); + + // Insert the old oplogDeleteFromPoint and make sure that we don't read it and do not have + // trouble reading and updating the new one. + ASSERT_OK(getStorageInterface()->createCollection(opCtx, minValidNss, {})); + ASSERT_OK(getStorageInterface()->insertDocument( + opCtx, + minValidNss, + BSON("_id" << OID::gen() << MinValidDocument::kMinValidTimestampFieldName + << minValidTime.getTimestamp() + << MinValidDocument::kMinValidTermFieldName + << minValidTime.getTerm() + << ReplicationConsistencyMarkersImpl::kOldOplogDeleteFromPointFieldName + << time1))); + consistencyMarkers.initializeMinValidDocument(opCtx); + ASSERT(consistencyMarkers.getOplogTruncateAfterPoint(opCtx).isNull()); + ASSERT_EQ(consistencyMarkers.getMinValid(opCtx), minValidTime); + + consistencyMarkers.setOplogTruncateAfterPoint(opCtx, time2); + ASSERT_EQ(consistencyMarkers.getOplogTruncateAfterPoint(opCtx), time2); +} + } // namespace diff --git a/src/mongo/db/repl/replication_consistency_markers_mock.cpp b/src/mongo/db/repl/replication_consistency_markers_mock.cpp index e1c7afc0794..b2df7bc5553 100644 --- a/src/mongo/db/repl/replication_consistency_markers_mock.cpp +++ b/src/mongo/db/repl/replication_consistency_markers_mock.cpp @@ -42,7 +42,7 @@ void ReplicationConsistencyMarkersMock::initializeMinValidDocument(OperationCont { stdx::lock_guard<stdx::mutex> lock(_minValidBoundariesMutex); _minValid = {}; - _oplogDeleteFromPoint = {}; + _oplogTruncateAfterPoint = {}; _appliedThrough = {}; } } @@ -79,16 +79,16 @@ void ReplicationConsistencyMarkersMock::setMinValidToAtLeast(OperationContext* o _minValid = std::max(_minValid, minValid); } -void ReplicationConsistencyMarkersMock::setOplogDeleteFromPoint(OperationContext* opCtx, - const Timestamp& timestamp) { +void ReplicationConsistencyMarkersMock::setOplogTruncateAfterPoint(OperationContext* opCtx, + const Timestamp& timestamp) { stdx::lock_guard<stdx::mutex> lock(_minValidBoundariesMutex); - _oplogDeleteFromPoint = timestamp; + _oplogTruncateAfterPoint = timestamp; } -Timestamp ReplicationConsistencyMarkersMock::getOplogDeleteFromPoint( +Timestamp ReplicationConsistencyMarkersMock::getOplogTruncateAfterPoint( OperationContext* opCtx) const { stdx::lock_guard<stdx::mutex> lock(_minValidBoundariesMutex); - return _oplogDeleteFromPoint; + return _oplogTruncateAfterPoint; } void ReplicationConsistencyMarkersMock::setAppliedThrough(OperationContext* opCtx, diff --git a/src/mongo/db/repl/replication_consistency_markers_mock.h b/src/mongo/db/repl/replication_consistency_markers_mock.h index b66bc03589c..75d820d11bc 100644 --- a/src/mongo/db/repl/replication_consistency_markers_mock.h +++ b/src/mongo/db/repl/replication_consistency_markers_mock.h @@ -60,8 +60,8 @@ public: void setMinValid(OperationContext* opCtx, const OpTime& minValid) override; void setMinValidToAtLeast(OperationContext* opCtx, const OpTime& minValid) override; - void setOplogDeleteFromPoint(OperationContext* opCtx, const Timestamp& timestamp) override; - Timestamp getOplogDeleteFromPoint(OperationContext* opCtx) const override; + void setOplogTruncateAfterPoint(OperationContext* opCtx, const Timestamp& timestamp) override; + Timestamp getOplogTruncateAfterPoint(OperationContext* opCtx) const override; void setAppliedThrough(OperationContext* opCtx, const OpTime& optime) override; OpTime getAppliedThrough(OperationContext* opCtx) const override; @@ -73,7 +73,7 @@ private: mutable stdx::mutex _minValidBoundariesMutex; OpTime _appliedThrough; OpTime _minValid; - Timestamp _oplogDeleteFromPoint; + Timestamp _oplogTruncateAfterPoint; }; } // namespace repl diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp index c35fa20b046..e92921ab55a 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp @@ -336,7 +336,7 @@ void ReplicationCoordinatorExternalStateImpl::shutdown(OperationContext* opCtx) // Perform additional shutdown steps below that must be done outside _threadMutex. - if (_replicationProcess->getConsistencyMarkers()->getOplogDeleteFromPoint(opCtx).isNull() && + if (_replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx).isNull() && loadLastOpTime(opCtx) == _replicationProcess->getConsistencyMarkers()->getAppliedThrough(opCtx)) { // Clear the appliedThrough marker to indicate we are consistent with the top of the @@ -415,7 +415,7 @@ OpTime ReplicationCoordinatorExternalStateImpl::onTransitionToPrimary(OperationC // Clear the appliedThrough marker so on startup we'll use the top of the oplog. This must be // done before we add anything to our oplog. invariant( - _replicationProcess->getConsistencyMarkers()->getOplogDeleteFromPoint(opCtx).isNull()); + _replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx).isNull()); _replicationProcess->getConsistencyMarkers()->setAppliedThrough(opCtx, {}); if (isV1ElectionProtocol) { @@ -569,28 +569,28 @@ void ReplicationCoordinatorExternalStateImpl::cleanUpLastApplyBatch(OperationCon return; // Initial Sync will take over so no cleanup is needed. } - const auto deleteFromPoint = - _replicationProcess->getConsistencyMarkers()->getOplogDeleteFromPoint(opCtx); + const auto truncateAfterPoint = + _replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx); const auto appliedThrough = _replicationProcess->getConsistencyMarkers()->getAppliedThrough(opCtx); - const bool needToDeleteEndOfOplog = !deleteFromPoint.isNull() && - // This version should never have a non-null deleteFromPoint with a null appliedThrough. + const bool needToDeleteEndOfOplog = !truncateAfterPoint.isNull() && + // This version should never have a non-null truncateAfterPoint with a null appliedThrough. // This scenario means that we downgraded after unclean shutdown, then the downgraded node // deleted the ragged end of our oplog, then did a clean shutdown. !appliedThrough.isNull() && - // Similarly we should never have an appliedThrough higher than the deleteFromPoint. This + // Similarly we should never have an appliedThrough higher than the truncateAfterPoint. This // means that the downgraded node deleted our ragged end then applied ahead of our - // deleteFromPoint and then had an unclean shutdown before upgrading. We are ok with + // truncateAfterPoint and then had an unclean shutdown before upgrading. We are ok with // applying these ops because older versions wrote to the oplog from a single thread so we // know they are in order. - !(appliedThrough.getTimestamp() >= deleteFromPoint); + !(appliedThrough.getTimestamp() >= truncateAfterPoint); if (needToDeleteEndOfOplog) { - log() << "Removing unapplied entries starting at: " << deleteFromPoint; - truncateOplogTo(opCtx, deleteFromPoint); + log() << "Removing unapplied entries starting at: " << truncateAfterPoint; + truncateOplogTo(opCtx, truncateAfterPoint); } - _replicationProcess->getConsistencyMarkers()->setOplogDeleteFromPoint( - opCtx, {}); // clear the deleteFromPoint + _replicationProcess->getConsistencyMarkers()->setOplogTruncateAfterPoint( + opCtx, {}); // clear the truncateAfterPoint if (appliedThrough.isNull()) { // No follow-up work to do. diff --git a/src/mongo/db/repl/sync_tail.cpp b/src/mongo/db/repl/sync_tail.cpp index 2c4106db227..c3d5bd30efb 100644 --- a/src/mongo/db/repl/sync_tail.cpp +++ b/src/mongo/db/repl/sync_tail.cpp @@ -1277,7 +1277,7 @@ StatusWith<OpTime> multiApply(OperationContext* opCtx, ON_BLOCK_EXIT([&] { workerPool->join(); }); // Write batch of ops into oplog. - consistencyMarkers->setOplogDeleteFromPoint(opCtx, ops.front().getTimestamp()); + consistencyMarkers->setOplogTruncateAfterPoint(opCtx, ops.front().getTimestamp()); scheduleWritesToOplog(opCtx, workerPool, ops); fillWriterVectors(opCtx, &ops, &writerVectors); @@ -1285,7 +1285,7 @@ StatusWith<OpTime> multiApply(OperationContext* opCtx, workerPool->join(); // Reset consistency markers in case the node fails while applying ops. - consistencyMarkers->setOplogDeleteFromPoint(opCtx, Timestamp()); + consistencyMarkers->setOplogTruncateAfterPoint(opCtx, Timestamp()); consistencyMarkers->setMinValidToAtLeast(opCtx, ops.back().getOpTime()); applyOps(writerVectors, workerPool, applyOperation, &statusVector); |