summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGregory Wlodarek <gregory.wlodarek@mongodb.com>2021-04-30 21:31:13 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-05-04 15:27:09 +0000
commit680744a808cc4387a8d6fb7c2d22ebc448a5f97c (patch)
treeb6214c73be07ec6c46252e3f392fea984989e208
parent8ed8f5bbd665b5601c08e08094b01ade51cc4576 (diff)
downloadmongo-680744a808cc4387a8d6fb7c2d22ebc448a5f97c.tar.gz
SERVER-56344 Test that secondaries with capped collections upgrading from FCV 4.4 to FCV 5.0 delete all documents over the cap after stepping up
-rw-r--r--jstests/noPassthrough/capped_collections_fcv_44_to_50_eventually_consistent.js102
-rw-r--r--src/mongo/db/catalog/collection_impl.cpp6
-rw-r--r--src/mongo/db/repl/oplog.cpp36
3 files changed, 136 insertions, 8 deletions
diff --git a/jstests/noPassthrough/capped_collections_fcv_44_to_50_eventually_consistent.js b/jstests/noPassthrough/capped_collections_fcv_44_to_50_eventually_consistent.js
new file mode 100644
index 00000000000..dd5506b11df
--- /dev/null
+++ b/jstests/noPassthrough/capped_collections_fcv_44_to_50_eventually_consistent.js
@@ -0,0 +1,102 @@
+/**
+ * In FCV 4.4, each node is responsible for deleting the excess documents in capped collections.
+ * This implies that capped deletes may not be synchronized between nodes at times. When upgraded to
+ * FCV 5.0, the primary will generate delete oplog entries for capped collections. However, if any
+ * secondary was behind in deleting excess documents while in FCV 4.4, the primary would have no way
+ * of knowing and it would delete the first document it sees locally. Eventually, when secondaries
+ * step up and start deleting capped documents, they will first delete previously missed documents
+ * that may already be deleted on other nodes.
+ *
+ * This tests that secondaries with capped collections upgrading from FCV 4.4 to FCV 5.0 delete all
+ * documents over the cap after stepping up.
+ *
+ * @tags: [requires_replication]
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+
+function assertContainsDocs(coll, docs) {
+ for (const doc of docs) {
+ assert.eq(1, coll.find(doc).itcount());
+ }
+}
+
+const rst = new ReplSetTest({
+ nodes: 2,
+ nodeOptions: {setParameter: {"oplogApplicationEnforcesSteadyStateConstraints": true}}
+});
+rst.startSet();
+rst.initiate();
+
+const primary = rst.getPrimary();
+const secondary = rst.getSecondary();
+
+assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: "4.4"}));
+
+const dbName = "test";
+const db = primary.getDB(dbName);
+const secDB = secondary.getDB(dbName);
+
+const collName = "eventually_consistent";
+assert.commandWorked(
+ db.createCollection(collName, {capped: true, size: 1024 * 1024 * 1024, max: 5}));
+
+const coll = db.getCollection(collName);
+const secColl = secDB.getCollection(collName);
+
+for (let i = 0; i < 5; i++) {
+ assert.commandWorked(coll.insert({x: i}));
+}
+
+// Pause capped deletes on the secondary.
+let fp = configureFailPoint(secondary, "skipCappedDeletes");
+
+for (let i = 5; i < 7; i++) {
+ assert.commandWorked(coll.insert({x: i}));
+}
+
+rst.awaitReplication();
+
+// Primary contains {x: 2} -> {x: 6}.
+assert.eq(5, coll.find({}).itcount());
+assertContainsDocs(coll, [{x: 2}, {x: 3}, {x: 4}, {x: 5}, {x: 6}]);
+
+// Secondary contains {x: 0} -> {x: 6}.
+assert.eq(7, secColl.find({}).itcount());
+assertContainsDocs(secColl, [{x: 0}, {x: 1}, {x: 2}, {x: 3}, {x: 4}, {x: 5}, {x: 6}]);
+
+assert.commandWorked(primary.adminCommand({setFeatureCompatibilityVersion: "5.0"}));
+fp.off();
+
+assert.commandWorked(coll.insert({x: 7}));
+rst.awaitReplication();
+
+// Primary contains {x: 3} -> {x: 7}.
+assert.eq(5, coll.find({}).itcount());
+assertContainsDocs(coll, [{x: 3}, {x: 4}, {x: 5}, {x: 6}, {x: 7}]);
+
+// Secondary contains {x: 0} -> {x: 1}, {x: 3} -> {x: 7}.
+assert.eq(7, secColl.find({}).itcount());
+assertContainsDocs(secColl, [{x: 0}, {x: 1}, {x: 3}, {x: 4}, {x: 5}, {x: 6}, {x: 7}]);
+
+rst.stepUp(secondary);
+rst.waitForState(secondary, ReplSetTest.State.PRIMARY);
+
+assert.commandWorked(secColl.insert({x: 8}));
+rst.awaitReplication();
+
+// Deleting already deleted documents on the old primary is a no-op.
+checkLog.containsJson(primary, 2170002);
+
+// Old primary, now secondary contains {x: 4} -> {x: 8}.
+assert.eq(5, coll.find({}).itcount());
+assertContainsDocs(coll, [{x: 4}, {x: 5}, {x: 6}, {x: 7}, {x: 8}]);
+
+// Old secondary, now primary contains {x: 4} -> {x: 8}.
+assert.eq(5, secColl.find({}).itcount());
+assertContainsDocs(secColl, [{x: 4}, {x: 5}, {x: 6}, {x: 7}, {x: 8}]);
+
+rst.stopSet();
+}());
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index 587b2e30b80..5ea64a3efcb 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -110,6 +110,8 @@ MONGO_FAIL_POINT_DEFINE(allowSettingMalformedCollectionValidators);
// This fail point introduces corruption to documents during insert.
MONGO_FAIL_POINT_DEFINE(corruptDocumentOnInsert);
+MONGO_FAIL_POINT_DEFINE(skipCappedDeletes);
+
/**
* Checks the 'failCollectionInserts' fail point at the beginning of an insert operation to see if
* the insert should fail. Returns Status::OK if The function should proceed with the insertion.
@@ -831,6 +833,10 @@ Status CollectionImpl::_insertDocuments(OperationContext* opCtx,
}
bool CollectionImpl::_cappedAndNeedDelete(OperationContext* opCtx) const {
+ if (MONGO_unlikely(skipCappedDeletes.shouldFail())) {
+ return false;
+ }
+
if (!isCapped()) {
return false;
}
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 996f3bbcc4d..b36fefd5aa2 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -1521,16 +1521,36 @@ Status applyOperation_inlock(OperationContext* opCtx,
"Applied a delete which did not delete anything in steady state "
"replication",
"op"_attr = redact(op.toBSONForLogging()));
- if (collection)
+
+ // In FCV 4.4, each node is responsible for deleting the excess documents in
+ // capped collections. This implies that capped deletes may not be synchronized
+ // between nodes at times. When upgraded to FCV 5.0, the primary will generate
+ // delete oplog entries for capped collections. However, if any secondary was
+ // behind in deleting excess documents while in FCV 4.4, the primary would have
+ // no way of knowing and it would delete the first document it sees locally.
+ // Eventually, when secondaries step up and start deleting capped documents,
+ // they will first delete previously missed documents that may already be
+ // deleted on other nodes. For this reason we skip returning NoSuchKey for
+ // capped collections when oplog application is enforcing steady state
+ // constraints.
+ bool isCapped = false;
+ if (collection) {
+ isCapped = collection->isCapped();
opCounters->gotDeleteWasEmpty();
- else
+ } else {
opCounters->gotDeleteFromMissingNamespace();
- // This error is fatal when we are enforcing steady state constraints.
- uassert(collection ? ErrorCodes::NoSuchKey : ErrorCodes::NamespaceNotFound,
- str::stream() << "Applied a delete which did not delete anything in "
- "steady state replication : "
- << redact(op.toBSONForLogging()),
- !oplogApplicationEnforcesSteadyStateConstraints);
+ }
+
+ if (!isCapped) {
+ // This error is fatal when we are enforcing steady state constraints for
+ // non-capped collections.
+ uassert(collection ? ErrorCodes::NoSuchKey : ErrorCodes::NamespaceNotFound,
+ str::stream()
+ << "Applied a delete which did not delete anything in "
+ "steady state replication : "
+ << redact(op.toBSONForLogging()),
+ !oplogApplicationEnforcesSteadyStateConstraints);
+ }
}
wuow.commit();
});