summaryrefslogtreecommitdiff
path: root/src/mongo/db/repl
diff options
context:
space:
mode:
authorJudah Schvimer <judah@mongodb.com>2017-08-14 14:18:44 -0400
committerJudah Schvimer <judah@mongodb.com>2017-08-14 14:18:44 -0400
commit530c50d569e72170007ba0061a43758c873bab3b (patch)
tree096565144d0a1688c445624a355c916e68ea2bd9 /src/mongo/db/repl
parent10bff4daf0473b8970a2b7b7e34d18804655ba34 (diff)
downloadmongo-530c50d569e72170007ba0061a43758c873bab3b.tar.gz
SERVER-29894 fall back on old oplog delete from point in FCV 3.4
Diffstat (limited to 'src/mongo/db/repl')
-rw-r--r--src/mongo/db/repl/replication_consistency_markers.h25
-rw-r--r--src/mongo/db/repl/replication_consistency_markers.idl8
-rw-r--r--src/mongo/db/repl/replication_consistency_markers_impl.cpp37
-rw-r--r--src/mongo/db/repl/replication_consistency_markers_impl.h11
-rw-r--r--src/mongo/db/repl/replication_consistency_markers_impl_test.cpp44
-rw-r--r--src/mongo/db/repl/replication_consistency_markers_mock.cpp3
-rw-r--r--src/mongo/db/repl/replication_consistency_markers_mock.h2
-rw-r--r--src/mongo/db/repl/replication_recovery.cpp3
8 files changed, 115 insertions, 18 deletions
diff --git a/src/mongo/db/repl/replication_consistency_markers.h b/src/mongo/db/repl/replication_consistency_markers.h
index ddd46cbff6b..96ea46b68c7 100644
--- a/src/mongo/db/repl/replication_consistency_markers.h
+++ b/src/mongo/db/repl/replication_consistency_markers.h
@@ -59,6 +59,8 @@ class StorageInterface;
* ts: <Timestamp>,
* t: <long long>
* }, // field for 'appliedThrough'
+ * oplogDeleteFromPoint: <Timestamp>, // only exists on unclean upgrade
+ * // TODO (SERVER-30556): Remove after 3.6
* }
*
* The oplogTruncateAfterPoint document, in 'local.replset.oplogTruncateAfterPoint', is used to
@@ -140,21 +142,34 @@ public:
*/
virtual void setMinValidToAtLeast(OperationContext* opCtx, const OpTime& minValid) = 0;
- // -------- Oplog Delete From Point ----------
+ // -------- Oplog Truncate After Point ----------
/**
- * The oplog delete from point is set to the beginning of a batch of oplog entries before
+ * The oplog truncate after point is set to the beginning of a batch of oplog entries before
* the oplog entries are written into the oplog, and reset before we begin applying the batch.
- * On startup all oplog entries with a value >= the oplog delete from point should be deleted.
- * We write operations to the oplog in parallel so if we crash mid-batch there could be holes
- * in the oplog. Deleting them at startup keeps us consistent.
+ * On startup all oplog entries with a value >= the oplog truncate after point should be
+ * deleted. We write operations to the oplog in parallel so if we crash mid-batch there could
+ * be holes in the oplog. Deleting them at startup keeps us consistent.
*
* If null, no documents should be deleted.
+ *
+ * If we are in feature compatibility version 3.4 and there is no oplog truncate after point
+ * document, we fall back on the old oplog delete from point field in the minValid
+ * collection.
*/
virtual void setOplogTruncateAfterPoint(OperationContext* opCtx,
const Timestamp& timestamp) = 0;
virtual Timestamp getOplogTruncateAfterPoint(OperationContext* opCtx) const = 0;
+ /**
+ * The oplog delete from point may still exist on upgrade from an unclean shutdown. This
+ * function removes the field so it's gone after 3.6.
+ *
+ * TODO (SERVER-30556): Delete this function in 3.8 because the old oplog delete from point
+ * cannot exist.
+ */
+ virtual void removeOldOplogDeleteFromPointField(OperationContext* opCtx) = 0;
+
// -------- Applied Through ----------
/**
diff --git a/src/mongo/db/repl/replication_consistency_markers.idl b/src/mongo/db/repl/replication_consistency_markers.idl
index 4925845178d..8da24a587a6 100644
--- a/src/mongo/db/repl/replication_consistency_markers.idl
+++ b/src/mongo/db/repl/replication_consistency_markers.idl
@@ -50,6 +50,14 @@ structs:
type: optime
optional: true # This field is unset when we want to mark that we are consistent at the top of the oplog
description: "The OpTime of the last oplog entry we applied"
+ # TODO (SERVER-30556): Delete this field since it cannot exist after 3.6.
+ oplogDeleteFromPoint:
+ cpp_name: oldOplogDeleteFromPoint
+ type: timestamp
+ optional: true # This field only exists on 3.4 upgrade
+ description: "The timestamp of the first oplog entry in a batch when we are writing
+ oplog entries to the oplog after which the oplog may be inconsistent.
+ This field only exists on 3.4 upgrade."
doingInitialSync:
cpp_name: initialSyncFlag
type: bool
diff --git a/src/mongo/db/repl/replication_consistency_markers_impl.cpp b/src/mongo/db/repl/replication_consistency_markers_impl.cpp
index 0a61559fca1..c4be7496074 100644
--- a/src/mongo/db/repl/replication_consistency_markers_impl.cpp
+++ b/src/mongo/db/repl/replication_consistency_markers_impl.cpp
@@ -45,7 +45,6 @@ namespace repl {
constexpr StringData ReplicationConsistencyMarkersImpl::kDefaultMinValidNamespace;
constexpr StringData ReplicationConsistencyMarkersImpl::kDefaultOplogTruncateAfterPointNamespace;
constexpr StringData ReplicationConsistencyMarkersImpl::kDefaultCheckpointTimestampNamespace;
-constexpr StringData ReplicationConsistencyMarkersImpl::kOldOplogDeleteFromPointFieldName;
namespace {
const BSONObj kInitialSyncFlag(BSON(MinValidDocument::kInitialSyncFlagFieldName << true));
@@ -112,17 +111,12 @@ void ReplicationConsistencyMarkersImpl::initializeMinValidDocument(OperationCont
// This initializes the values of the required fields if they are not already set.
// If one of the fields is already set, the $max will prefer the existing value since it
- // will always be greater than the provided ones. We unset the old 'oplogDeleteFromPoint'
- // field so that we can remove it from the IDL struct. This is required because servers
- // upgrading from 3.4 may have created an 'oplogDeleteFromPoint' field already. The field
- // is guaranteed to be empty on clean shutdown and thus on upgrade, but may still exist.
+ // will always be greater than the provided ones.
_updateMinValidDocument(opCtx,
BSON("$max" << BSON(MinValidDocument::kMinValidTimestampFieldName
<< Timestamp()
<< MinValidDocument::kMinValidTermFieldName
- << OpTime::kUninitializedTerm)
- << "$unset"
- << BSON(kOldOplogDeleteFromPointFieldName << 1)));
+ << OpTime::kUninitializedTerm)));
}
bool ReplicationConsistencyMarkersImpl::getInitialSyncFlag(OperationContext* opCtx) const {
@@ -198,6 +192,12 @@ void ReplicationConsistencyMarkersImpl::setMinValidToAtLeast(OperationContext* o
<< minValid.getTerm())));
}
+void ReplicationConsistencyMarkersImpl::removeOldOplogDeleteFromPointField(
+ OperationContext* opCtx) {
+ _updateMinValidDocument(
+ opCtx, BSON("$unset" << BSON(MinValidDocument::kOldOplogDeleteFromPointFieldName << 1)));
+}
+
void ReplicationConsistencyMarkersImpl::setAppliedThrough(OperationContext* opCtx,
const OpTime& optime) {
LOG(3) << "setting appliedThrough to: " << optime.toString() << "(" << optime.toBSON() << ")";
@@ -277,6 +277,12 @@ Timestamp ReplicationConsistencyMarkersImpl::getOplogTruncateAfterPoint(
OperationContext* opCtx) const {
auto doc = _getOplogTruncateAfterPointDocument(opCtx);
if (!doc) {
+ if (serverGlobalParams.featureCompatibility.version.load() ==
+ ServerGlobalParams::FeatureCompatibility::Version::k34) {
+ LOG(3) << "Falling back on old oplog delete from point because there is no oplog "
+ "truncate after point and we are in FCV 3.4.";
+ return _getOldOplogDeleteFromPoint(opCtx);
+ }
LOG(3) << "Returning empty oplog truncate after point since document did not exist";
return {};
}
@@ -287,6 +293,21 @@ Timestamp ReplicationConsistencyMarkersImpl::getOplogTruncateAfterPoint(
return out;
}
+Timestamp ReplicationConsistencyMarkersImpl::_getOldOplogDeleteFromPoint(
+ OperationContext* opCtx) const {
+ auto doc = _getMinValidDocument(opCtx);
+ invariant(doc); // Initialized at startup so it should never be missing.
+
+ auto oplogDeleteFromPoint = doc->getOldOplogDeleteFromPoint();
+ if (!oplogDeleteFromPoint) {
+ LOG(3) << "No oplogDeleteFromPoint timestamp set, returning empty timestamp.";
+ return {};
+ }
+
+ LOG(3) << "returning oplog delete from point: " << oplogDeleteFromPoint.get();
+ return oplogDeleteFromPoint.get();
+}
+
void ReplicationConsistencyMarkersImpl::_upsertCheckpointTimestampDocument(
OperationContext* opCtx, const BSONObj& updateSpec) {
auto status = _storageInterface->upsertById(
diff --git a/src/mongo/db/repl/replication_consistency_markers_impl.h b/src/mongo/db/repl/replication_consistency_markers_impl.h
index d6f298f0ba1..7bf70fa5c0b 100644
--- a/src/mongo/db/repl/replication_consistency_markers_impl.h
+++ b/src/mongo/db/repl/replication_consistency_markers_impl.h
@@ -53,8 +53,6 @@ public:
"local.replset.oplogTruncateAfterPoint"_sd;
static constexpr StringData kDefaultCheckpointTimestampNamespace =
"local.replset.checkpointTimestamp"_sd;
- // TODO: Remove this constant and its usage in minValid initialization in 3.8.
- static constexpr StringData kOldOplogDeleteFromPointFieldName = "oplogDeleteFromPoint"_sd;
explicit ReplicationConsistencyMarkersImpl(StorageInterface* storageInterface);
ReplicationConsistencyMarkersImpl(StorageInterface* storageInterface,
@@ -75,6 +73,8 @@ public:
void setOplogTruncateAfterPoint(OperationContext* opCtx, const Timestamp& timestamp) override;
Timestamp getOplogTruncateAfterPoint(OperationContext* opCtx) const override;
+ void removeOldOplogDeleteFromPointField(OperationContext* opCtx) override;
+
void setAppliedThrough(OperationContext* opCtx, const OpTime& optime) override;
OpTime getAppliedThrough(OperationContext* opCtx) const override;
@@ -104,6 +104,13 @@ private:
OperationContext* opCtx) const;
/**
+ * Returns the old oplog delete from point from the minValid document. Returns an empty
+ * timestamp if the field does not exist. This is used to fallback in FCV 3.4 if the oplog
+ * truncate after point document does not exist.
+ */
+ Timestamp _getOldOplogDeleteFromPoint(OperationContext* opCtx) const;
+
+ /**
* Reads the CheckpointTimestamp document from disk.
* Returns boost::none if not present.
*/
diff --git a/src/mongo/db/repl/replication_consistency_markers_impl_test.cpp b/src/mongo/db/repl/replication_consistency_markers_impl_test.cpp
index 8a0ba53e6ce..03885151087 100644
--- a/src/mongo/db/repl/replication_consistency_markers_impl_test.cpp
+++ b/src/mongo/db/repl/replication_consistency_markers_impl_test.cpp
@@ -300,8 +300,7 @@ TEST_F(ReplicationConsistencyMarkersTest, OplogTruncateAfterPointUpgrade) {
Timestamp time2(Seconds(456), 0);
OpTime minValidTime(Timestamp(789), 2);
- // Insert the old oplogDeleteFromPoint and make sure that we don't read it and do not have
- // trouble reading and updating the new one.
+ // Insert the old oplogDeleteFromPoint and make sure getOplogTruncateAfterPoint() returns it.
ASSERT_OK(getStorageInterface()->createCollection(opCtx, minValidNss, {}));
ASSERT_OK(getStorageInterface()->insertDocument(
opCtx,
@@ -310,12 +309,51 @@ TEST_F(ReplicationConsistencyMarkersTest, OplogTruncateAfterPointUpgrade) {
<< minValidTime.getTimestamp()
<< MinValidDocument::kMinValidTermFieldName
<< minValidTime.getTerm()
- << ReplicationConsistencyMarkersImpl::kOldOplogDeleteFromPointFieldName
+ << MinValidDocument::kOldOplogDeleteFromPointFieldName
<< time1)));
consistencyMarkers.initializeMinValidDocument(opCtx);
+
+ // Set the feature compatibility version to 3.6.
+ serverGlobalParams.featureCompatibility.version.store(
+ ServerGlobalParams::FeatureCompatibility::Version::k36);
+
+ // Check that we see no oplog truncate after point in FCV 3.6.
+ ASSERT(consistencyMarkers.getOplogTruncateAfterPoint(opCtx).isNull());
+ ASSERT_EQ(consistencyMarkers.getMinValid(opCtx), minValidTime);
+
+ // Set the feature compatibility version to 3.4.
+ serverGlobalParams.featureCompatibility.version.store(
+ ServerGlobalParams::FeatureCompatibility::Version::k34);
+
+ // Check that we see the old oplog delete from point in FCV 3.4.
+ ASSERT_EQ(consistencyMarkers.getOplogTruncateAfterPoint(opCtx), time1);
+ ASSERT_EQ(consistencyMarkers.getMinValid(opCtx), minValidTime);
+
+ // Check that the minValid document has the oplog delete from point.
+ auto minValidDocument = getMinValidDocument(opCtx, minValidNss);
+ ASSERT_TRUE(minValidDocument.hasField(MinValidDocument::kOldOplogDeleteFromPointFieldName));
+
+ consistencyMarkers.removeOldOplogDeleteFromPointField(opCtx);
+
+ // Check that the minValid document does not have the oplog delete from point.
+ minValidDocument = getMinValidDocument(opCtx, minValidNss);
+ ASSERT_FALSE(minValidDocument.hasField(MinValidDocument::kOldOplogDeleteFromPointFieldName));
+
+ // Check that after removing the old oplog delete from point, that we do not see the oplog
+ // truncate after point in FCV 3.4.
+ ASSERT(consistencyMarkers.getOplogTruncateAfterPoint(opCtx).isNull());
+ ASSERT_EQ(consistencyMarkers.getMinValid(opCtx), minValidTime);
+
+ // Set the feature compatibility version to 3.6.
+ serverGlobalParams.featureCompatibility.version.store(
+ ServerGlobalParams::FeatureCompatibility::Version::k36);
+
+ // Check that after removing the old oplog delete from point, that we do not see the oplog
+ // truncate after point in FCV 3.6.
ASSERT(consistencyMarkers.getOplogTruncateAfterPoint(opCtx).isNull());
ASSERT_EQ(consistencyMarkers.getMinValid(opCtx), minValidTime);
+ // Check that we can set the oplog truncate after point.
consistencyMarkers.setOplogTruncateAfterPoint(opCtx, time2);
ASSERT_EQ(consistencyMarkers.getOplogTruncateAfterPoint(opCtx), time2);
}
diff --git a/src/mongo/db/repl/replication_consistency_markers_mock.cpp b/src/mongo/db/repl/replication_consistency_markers_mock.cpp
index ec533e1558f..9a33b0ea902 100644
--- a/src/mongo/db/repl/replication_consistency_markers_mock.cpp
+++ b/src/mongo/db/repl/replication_consistency_markers_mock.cpp
@@ -91,6 +91,9 @@ Timestamp ReplicationConsistencyMarkersMock::getOplogTruncateAfterPoint(
return _oplogTruncateAfterPoint;
}
+void ReplicationConsistencyMarkersMock::removeOldOplogDeleteFromPointField(
+ OperationContext* opCtx) {}
+
void ReplicationConsistencyMarkersMock::setAppliedThrough(OperationContext* opCtx,
const OpTime& optime) {
stdx::lock_guard<stdx::mutex> lock(_minValidBoundariesMutex);
diff --git a/src/mongo/db/repl/replication_consistency_markers_mock.h b/src/mongo/db/repl/replication_consistency_markers_mock.h
index cf3745a529e..45497877cea 100644
--- a/src/mongo/db/repl/replication_consistency_markers_mock.h
+++ b/src/mongo/db/repl/replication_consistency_markers_mock.h
@@ -63,6 +63,8 @@ public:
void setOplogTruncateAfterPoint(OperationContext* opCtx, const Timestamp& timestamp) override;
Timestamp getOplogTruncateAfterPoint(OperationContext* opCtx) const override;
+ void removeOldOplogDeleteFromPointField(OperationContext* opCtx) override;
+
void setAppliedThrough(OperationContext* opCtx, const OpTime& optime) override;
OpTime getAppliedThrough(OperationContext* opCtx) const override;
diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp
index b705bdc94ba..8042674d358 100644
--- a/src/mongo/db/repl/replication_recovery.cpp
+++ b/src/mongo/db/repl/replication_recovery.cpp
@@ -73,6 +73,9 @@ void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx) try {
}
_consistencyMarkers->setOplogTruncateAfterPoint(opCtx, {}); // clear the truncateAfterPoint
+ // TODO (SERVER-30556): Delete this line since the old oplog delete from point cannot exist.
+ _consistencyMarkers->removeOldOplogDeleteFromPointField(opCtx);
+
auto topOfOplogSW = _getLastAppliedOpTime(opCtx);
boost::optional<OpTime> topOfOplog = boost::none;
if (topOfOplogSW.getStatus() != ErrorCodes::CollectionIsEmpty &&