diff options
author | Daniel Gottlieb <daniel.gottlieb@mongodb.com> | 2019-01-24 20:35:57 -0500 |
---|---|---|
committer | Daniel Gottlieb <daniel.gottlieb@mongodb.com> | 2019-01-24 20:35:57 -0500 |
commit | 5f213f2d419d9549559281fef7d3704ad7614d12 (patch) | |
tree | 6c37c0f4c12d53fcfd0faefa63807de46a280f05 /jstests | |
parent | 3a1b11c960c6fc9bc5c94e4d5cd58f38c78ca55b (diff) | |
download | mongo-5f213f2d419d9549559281fef7d3704ad7614d12.tar.gz |
SERVER-38499: Enforce oplog visibility at the MongoDB layer.
WiredTiger guards against transactions preparing with a timestamp
earlier than the most recent reader. This guarantees no reader may
have seen the wrong version of a document.
The oplog is a special case. Because the oplog does not contain
prepared updates, and oplog readers cannot read from other
collections, it's valid to prepare behind an oplog readers
timestamp.
However, WiredTiger is not aware the oplog is special. When MongoDB
uses WiredTiger `read_timestamp`s to enforce oplog visibility, there
are cases (specifically, secondary oplog application) where an oplog
reader can be in front of an impending prepare.
There were two strategies available for resolving this. The first is
to artificially hold back what oplog is available to read at until
nothing can be prepared behind an oplog reader. The second strategy,
which is what this patch does, is to have the MongoDB layer hide
documents that are newer than the visibility point. The mechanism for
calculating and discovering the visibility point is unchanged.
Diffstat (limited to 'jstests')
-rw-r--r-- | jstests/replsets/oplog_visibility.js | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/jstests/replsets/oplog_visibility.js b/jstests/replsets/oplog_visibility.js new file mode 100644 index 00000000000..7ab7877adfc --- /dev/null +++ b/jstests/replsets/oplog_visibility.js @@ -0,0 +1,119 @@ +/** + * Test oplog visibility enforcement of primaries and secondaries. This test uses a client to read + * the oplog while there are concurrent writers. The client copies all the timestamps it sees and + * verifies a later scan over the range returns the same values. + * + * @tags: [requires_document_locking] + */ +(function() { + "use strict"; + + load("jstests/libs/parallelTester.js"); // for ScopedThread. + + const replTest = new ReplSetTest({ + name: "oplog_visibility", + nodes: [{}, {rsConfig: {priority: 0}}, {rsConfig: {priority: 0}}], + settings: {chainingAllowed: true} + }); + replTest.startSet(); + replTest.initiate(); + + jsTestLog("Enabling `sleepBeforeCommit` failpoint."); + for (let node of replTest.nodes) { + assert.commandWorked(node.adminCommand( + {configureFailPoint: "sleepBeforeCommit", mode: {activationProbability: 0.01}})); + } + + jsTestLog("Starting concurrent writers."); + let stopLatch = new CountDownLatch(1); + let writers = []; + for (let idx = 0; idx < 2; ++idx) { + let coll = "coll_" + idx; + let writer = new ScopedThread(function(host, coll, stopLatch) { + const conn = new Mongo(host); + let id = 0; + while (stopLatch.getCount() > 0) { + conn.getDB("test").getCollection(coll).insert({_id: id}); + id++; + } + }, replTest.getPrimary().host, coll, stopLatch); + + writer.start(); + writers.push(writer); + } + + for (let node of replTest.nodes) { + let testOplog = function(node) { + let timestamps = []; + + let local = node.getDB("local"); + let oplogStart = + local.getCollection("oplog.rs").find().sort({$natural: -1}).limit(-1).next()["ts"]; + jsTestLog({"Node": node.host, "StartTs": oplogStart}); + + while (timestamps.length < 1000) { + // Query with $gte to validate continuinity. Do not add this first record to the + // recorded timestamps. Its value was already added in the last cursor. + let cursor = local.getCollection("oplog.rs") + .find({ts: {$gte: oplogStart}}) + .sort({$natural: 1}) + .tailable(true) + .batchSize(100); + assert(cursor.hasNext()); + assert.eq(oplogStart, cursor.next()["ts"]); + + // While this method wants to capture 1000 timestamps, the cursor has a batch size + // of 100 and this loop makes 200 iterations before getting a new cursor from a + // fresh query. The goal is to exercise getMores, which use different code paths + // for establishing their oplog reader transactions. + for (let num = 0; num < 200 && timestamps.length < 1000; ++num) { + try { + cursor.hasNext(); + } catch (exc) { + break; + } + let ts = cursor.next()["ts"]; + timestamps.push(ts); + oplogStart = ts; + } + } + + jsTestLog({"Verifying": node.host, "StartTs": timestamps[0], "EndTs": timestamps[999]}); + oplogStart = timestamps[0]; + let cursor = + local.getCollection("oplog.rs").find({ts: {$gte: oplogStart}}).sort({$natural: 1}); + for (let observedTsIdx in timestamps) { + let observedTs = timestamps[observedTsIdx]; + assert(cursor.hasNext()); + let actualTs = cursor.next()["ts"]; + assert.eq(actualTs, observedTs, function() { + let prev = null; + let next = null; + if (observedTsIdx > 0) { + prev = timestamps[observedTsIdx - 1]; + } + if (observedTsIdx + 1 < timestamps.length) { + next = timestamps[observedTsIdx + 1]; + } + + return tojson({ + "Missing": actualTs, + "ObservedIdx": observedTsIdx, + "PrevObserved": prev, + "NextObserved": next + }); + }); + } + }; + + jsTestLog({"Testing": node.host}); + testOplog(node); + } + jsTestLog("Stopping writers."); + stopLatch.countDown(); + writers.forEach((writer) => { + writer.join(); + }); + + replTest.stopSet(); +})(); |