summaryrefslogtreecommitdiff
path: root/jstests
diff options
context:
space:
mode:
authorDaniel Gottlieb <daniel.gottlieb@mongodb.com>2019-01-24 20:35:57 -0500
committerDaniel Gottlieb <daniel.gottlieb@mongodb.com>2019-01-24 20:35:57 -0500
commit5f213f2d419d9549559281fef7d3704ad7614d12 (patch)
tree6c37c0f4c12d53fcfd0faefa63807de46a280f05 /jstests
parent3a1b11c960c6fc9bc5c94e4d5cd58f38c78ca55b (diff)
downloadmongo-5f213f2d419d9549559281fef7d3704ad7614d12.tar.gz
SERVER-38499: Enforce oplog visibility at the MongoDB layer.
WiredTiger guards against transactions preparing with a timestamp earlier than the most recent reader. This guarantees no reader may have seen the wrong version of a document. The oplog is a special case. Because the oplog does not contain prepared updates, and oplog readers cannot read from other collections, it's valid to prepare behind an oplog readers timestamp. However, WiredTiger is not aware the oplog is special. When MongoDB uses WiredTiger `read_timestamp`s to enforce oplog visibility, there are cases (specifically, secondary oplog application) where an oplog reader can be in front of an impending prepare. There were two strategies available for resolving this. The first is to artificially hold back what oplog is available to read at until nothing can be prepared behind an oplog reader. The second strategy, which is what this patch does, is to have the MongoDB layer hide documents that are newer than the visibility point. The mechanism for calculating and discovering the visibility point is unchanged.
Diffstat (limited to 'jstests')
-rw-r--r--jstests/replsets/oplog_visibility.js119
1 files changed, 119 insertions, 0 deletions
diff --git a/jstests/replsets/oplog_visibility.js b/jstests/replsets/oplog_visibility.js
new file mode 100644
index 00000000000..7ab7877adfc
--- /dev/null
+++ b/jstests/replsets/oplog_visibility.js
@@ -0,0 +1,119 @@
+/**
+ * Test oplog visibility enforcement of primaries and secondaries. This test uses a client to read
+ * the oplog while there are concurrent writers. The client copies all the timestamps it sees and
+ * verifies a later scan over the range returns the same values.
+ *
+ * @tags: [requires_document_locking]
+ */
+(function() {
+ "use strict";
+
+ load("jstests/libs/parallelTester.js"); // for ScopedThread.
+
+ const replTest = new ReplSetTest({
+ name: "oplog_visibility",
+ nodes: [{}, {rsConfig: {priority: 0}}, {rsConfig: {priority: 0}}],
+ settings: {chainingAllowed: true}
+ });
+ replTest.startSet();
+ replTest.initiate();
+
+ jsTestLog("Enabling `sleepBeforeCommit` failpoint.");
+ for (let node of replTest.nodes) {
+ assert.commandWorked(node.adminCommand(
+ {configureFailPoint: "sleepBeforeCommit", mode: {activationProbability: 0.01}}));
+ }
+
+ jsTestLog("Starting concurrent writers.");
+ let stopLatch = new CountDownLatch(1);
+ let writers = [];
+ for (let idx = 0; idx < 2; ++idx) {
+ let coll = "coll_" + idx;
+ let writer = new ScopedThread(function(host, coll, stopLatch) {
+ const conn = new Mongo(host);
+ let id = 0;
+ while (stopLatch.getCount() > 0) {
+ conn.getDB("test").getCollection(coll).insert({_id: id});
+ id++;
+ }
+ }, replTest.getPrimary().host, coll, stopLatch);
+
+ writer.start();
+ writers.push(writer);
+ }
+
+ for (let node of replTest.nodes) {
+ let testOplog = function(node) {
+ let timestamps = [];
+
+ let local = node.getDB("local");
+ let oplogStart =
+ local.getCollection("oplog.rs").find().sort({$natural: -1}).limit(-1).next()["ts"];
+ jsTestLog({"Node": node.host, "StartTs": oplogStart});
+
+ while (timestamps.length < 1000) {
+ // Query with $gte to validate continuinity. Do not add this first record to the
+ // recorded timestamps. Its value was already added in the last cursor.
+ let cursor = local.getCollection("oplog.rs")
+ .find({ts: {$gte: oplogStart}})
+ .sort({$natural: 1})
+ .tailable(true)
+ .batchSize(100);
+ assert(cursor.hasNext());
+ assert.eq(oplogStart, cursor.next()["ts"]);
+
+ // While this method wants to capture 1000 timestamps, the cursor has a batch size
+ // of 100 and this loop makes 200 iterations before getting a new cursor from a
+ // fresh query. The goal is to exercise getMores, which use different code paths
+ // for establishing their oplog reader transactions.
+ for (let num = 0; num < 200 && timestamps.length < 1000; ++num) {
+ try {
+ cursor.hasNext();
+ } catch (exc) {
+ break;
+ }
+ let ts = cursor.next()["ts"];
+ timestamps.push(ts);
+ oplogStart = ts;
+ }
+ }
+
+ jsTestLog({"Verifying": node.host, "StartTs": timestamps[0], "EndTs": timestamps[999]});
+ oplogStart = timestamps[0];
+ let cursor =
+ local.getCollection("oplog.rs").find({ts: {$gte: oplogStart}}).sort({$natural: 1});
+ for (let observedTsIdx in timestamps) {
+ let observedTs = timestamps[observedTsIdx];
+ assert(cursor.hasNext());
+ let actualTs = cursor.next()["ts"];
+ assert.eq(actualTs, observedTs, function() {
+ let prev = null;
+ let next = null;
+ if (observedTsIdx > 0) {
+ prev = timestamps[observedTsIdx - 1];
+ }
+ if (observedTsIdx + 1 < timestamps.length) {
+ next = timestamps[observedTsIdx + 1];
+ }
+
+ return tojson({
+ "Missing": actualTs,
+ "ObservedIdx": observedTsIdx,
+ "PrevObserved": prev,
+ "NextObserved": next
+ });
+ });
+ }
+ };
+
+ jsTestLog({"Testing": node.host});
+ testOplog(node);
+ }
+ jsTestLog("Stopping writers.");
+ stopLatch.countDown();
+ writers.forEach((writer) => {
+ writer.join();
+ });
+
+ replTest.stopSet();
+})();