diff options
author | Max Hirschhorn <max.hirschhorn@mongodb.com> | 2019-02-11 15:43:27 -0500 |
---|---|---|
committer | Max Hirschhorn <max.hirschhorn@mongodb.com> | 2019-02-11 15:43:27 -0500 |
commit | 9db1a8dffe753808bea0d8c47d9fc959eaea9ea0 (patch) | |
tree | 26b5750c5088d745ab1fb93596d010501b0b4cbe /jstests | |
parent | 691ab6da0c38f52f32c1028a8fa7447997ced255 (diff) | |
download | mongo-9db1a8dffe753808bea0d8c47d9fc959eaea9ea0.tar.gz |
SERVER-39169 Add $_internalReadAtClusterTime option to find and dbHash.
The new $_internalReadAtClusterTime option replaces all usages of
running the dbHash command inside of a multi-statement transaction. It
can be used to read from a consistent snapshot in place of specifying an
atClusterTime read concern.
Unlike multi-statement transactions, the new $_internalReadAtClusterTime
option doesn't cause locks to be left on the server after returning a
network response. It instead restores the snapshot to read from as part
of handling the request.
Diffstat (limited to 'jstests')
-rw-r--r-- | jstests/hooks/run_check_repl_dbhash_background.js | 85 | ||||
-rw-r--r-- | jstests/replsets/dbhash_lock_acquisition.js | 95 | ||||
-rw-r--r-- | jstests/replsets/dbhash_read_at_cluster_time.js | 120 | ||||
-rw-r--r-- | jstests/replsets/read_at_cluster_time_outside_transactions.js | 99 |
4 files changed, 313 insertions, 86 deletions
diff --git a/jstests/hooks/run_check_repl_dbhash_background.js b/jstests/hooks/run_check_repl_dbhash_background.js index 8e5b942a2eb..8cd338b2d32 100644 --- a/jstests/hooks/run_check_repl_dbhash_background.js +++ b/jstests/hooks/run_check_repl_dbhash_background.js @@ -3,15 +3,17 @@ * * Unlike run_check_repl_dbhash.js, this version of the hook doesn't require that all operations * have finished replicating, nor does it require that the test has finished running. The dbHash - * command is run inside of a transaction specifying atClusterTime in order for an identical - * snapshot to be used by all members of the replica set. + * command reads at a particular clusterTime in order for an identical snapshot to be used by all + * members of the replica set. * - * The find and getMore commands used to generate the collection diff run as part of the same - * transaction as the dbHash command. This ensures the diagnostics for a dbhash mismatch aren't - * subjected to changes from any operations in flight. + * The find and getMore commands used to generate the collection diff read at the same clusterTime + * as the dbHash command. While this ensures the diagnostics for a dbhash mismatch aren't subjected + * to changes from any operations in flight, it is possible for the collection or an index on the + * collection to be dropped due to no locks being held. * - * If a transient transaction error occurs, then the dbhash check is retried until it succeeds, or - * until it fails with a non-transient error. + * If a transient error occurs, then the dbhash check is retried until it succeeds, or until it + * fails with a non-transient error. The most common case of a transient error is attempting to read + * from a collection after a catalog operation has been performed on the collection or database. */ 'use strict'; @@ -26,7 +28,7 @@ let debugInfo = []; // We turn off printing the JavaScript stacktrace in doassert() to avoid generating an - // overwhelming amount of log messages when handling transient transaction errors. + // overwhelming amount of log messages when handling transient errors. TestData = TestData || {}; TestData.traceExceptions = false; @@ -69,13 +71,8 @@ const kForeverSeconds = 1e9; const dbNames = new Set(); - // We enable the "WTPreserveSnapshotHistoryIndefinitely" failpoint and extend the - // "transactionLifetimeLimitSeconds" server parameter to ensure that - // - // (1) the same snapshot will be available to read at on the primary and secondaries, and - // - // (2) the potentally long-running transaction isn't killed while we are part-way through - // verifying data consistency. + // We enable the "WTPreserveSnapshotHistoryIndefinitely" failpoint to ensure that the same + // snapshot will be available to read at on the primary and secondaries. for (let session of sessions) { const db = session.getDatabase('admin'); @@ -96,19 +93,6 @@ mode: 'off', })); }); - - const res = assert.commandWorked(db.runCommand({ - setParameter: 1, - transactionLifetimeLimitSeconds: kForeverSeconds, - })); - - const transactionLifetimeLimitSecondsOriginal = res.was; - resetFns.push(() => { - assert.commandWorked(db.runCommand({ - setParameter: 1, - transactionLifetimeLimitSeconds: transactionLifetimeLimitSecondsOriginal, - })); - }); } for (let session of sessions) { @@ -124,8 +108,8 @@ }); } - // Transactions cannot be run on the following databases. (The "local" database is also not - // replicated.) + // Transactions cannot be run on the following databases so we don't attempt to read at a + // clusterTime on them either. (The "local" database is also not replicated.) dbNames.delete('admin'); dbNames.delete('config'); dbNames.delete('local'); @@ -133,9 +117,8 @@ const results = []; // The waitForSecondaries() function waits for all secondaries to have applied up to - // 'clusterTime' locally. This ensures that a later atClusterTime read inside a transaction - // doesn't stall as a result of a pending global X lock (e.g. from a dropDatabase command) on - // the primary preventing getMores on the oplog from receiving a response. + // 'clusterTime' locally. This ensures that a later $_internalReadAtClusterTime read doesn't + // fail as a result of the secondary's clusterTime being behind 'clusterTime'. const waitForSecondaries = (clusterTime, signedClusterTime) => { debugInfo.push({"waitForSecondaries": clusterTime, "signedClusterTime": signedClusterTime}); for (let i = 1; i < sessions.length; ++i) { @@ -198,9 +181,10 @@ // isn't multi-versioned. Unlike with ReplSetTest#checkReplicatedDataHashes(), it is possible // for a collection catalog operation (e.g. a drop or rename) to have been applied on the // primary but not yet applied on the secondary. - const checkCollectionHashesForDB = (dbName) => { + const checkCollectionHashesForDB = (dbName, clusterTime) => { const result = []; - const hashes = rst.getHashesUsingSessions(sessions, dbName); + const hashes = + rst.getHashesUsingSessions(sessions, dbName, {readAtClusterTime: clusterTime}); const hashesByUUID = hashes.map((response, i) => { const info = {}; @@ -281,9 +265,7 @@ // ReplSetTest#getCollectionDiffUsingSessions() upon detecting a dbHash mismatch. It is // presumed to still useful to know that a bug exists even if we cannot get more // diagnostics for it. - if ((e.hasOwnProperty('errorLabels') && - e.errorLabels.includes('TransientTransactionError')) || - e.code === ErrorCodes.Interrupted) { + if (e.code === ErrorCodes.Interrupted || e.code === ErrorCodes.SnapshotUnavailable) { hasTransientError = true; return true; } @@ -310,23 +292,15 @@ debugInfo.push({ "node": session.getClient(), "session": session, - "startTransaction": clusterTime + "readAtClusterTime": clusterTime }); - session.startTransaction( - {readConcern: {level: 'snapshot', atClusterTime: clusterTime}}); } hasTransientError = false; try { - result = checkCollectionHashesForDB(dbName); + result = checkCollectionHashesForDB(dbName, clusterTime); } catch (e) { - // We abort each of the transactions started on the nodes if one of them returns an - // error while running the dbHash check. - for (let session of sessions) { - session.abortTransaction_forTesting(); - } - if (isTransientError(e)) { debugInfo.push({"transientError": e}); continue; @@ -335,21 +309,6 @@ jsTestLog(debugInfo); throw e; } - - // We then attempt to commit each of the transactions started on the nodes to confirm - // the data we read was actually majority-committed. If one of them returns an error, - // then we still try to commit the transactions started on subsequent nodes in order to - // clear their transaction state. - for (let session of sessions) { - try { - session.commitTransaction(); - } catch (e) { - if (!isTransientError(e)) { - jsTestLog(debugInfo); - throw e; - } - } - } } while (hasTransientError); for (let mismatchInfo of result) { diff --git a/jstests/replsets/dbhash_lock_acquisition.js b/jstests/replsets/dbhash_lock_acquisition.js index f178e5ce4b5..cf26fb6caea 100644 --- a/jstests/replsets/dbhash_lock_acquisition.js +++ b/jstests/replsets/dbhash_lock_acquisition.js @@ -1,11 +1,14 @@ /** - * Tests that the dbHash command acquires IX mode locks on the global, database, and collection - * resources when running inside a multi-statement transaction. + * Tests that the dbHash command acquires IS mode locks on the global, database, and collection + * resources when reading a timestamp using the $_internalReadAtClusterTime option. + * * @tags: [uses_transactions] */ (function() { "use strict"; + load("jstests/libs/parallelTester.js"); // for ScopedThread + const rst = new ReplSetTest({nodes: 1}); rst.startSet(); rst.initiate(); @@ -16,31 +19,77 @@ const session = primary.startSession({causalConsistency: false}); const sessionDB = session.getDatabase(db.getName()); - function assertTransactionAcquiresIXLocks(txnOptions) { - session.startTransaction(txnOptions); + // We insert a document so the dbHash command has a collection to process. + assert.commandWorked(sessionDB.mycoll.insert({}, {writeConcern: {w: "majority"}})); + const clusterTime = session.getOperationTime(); + + // We then start a transaction in order to be able have a catalog operation queue up behind it. + session.startTransaction(); + assert.commandWorked(sessionDB.mycoll.insert({})); - assert.commandWorked(sessionDB.runCommand({dbHash: 1})); - const ops = db.currentOp({"lsid.id": session.getSessionId().id}).inprog; - assert.eq(1, - ops.length, - () => "Failed to find session in currentOp() output: " + tojson(db.currentOp())); - assert.eq(ops[0].locks, {Global: "w", Database: "w", Collection: "w"}); + const ops = db.currentOp({"lsid.id": session.getSessionId().id}).inprog; + assert.eq(1, + ops.length, + () => "Failed to find session in currentOp() output: " + tojson(db.currentOp())); + assert.eq(ops[0].locks, {Global: "w", Database: "w", Collection: "w"}); - session.abortTransaction_forTesting(); - } + const threadCaptruncCmd = new ScopedThread(function(host) { + try { + const conn = new Mongo(host); + const db = conn.getDB("test"); - // We insert a document so the dbHash command has a collection to process. - assert.commandWorked(db.mycoll.insert({}, {writeConcern: {w: "majority"}})); - - assertTransactionAcquiresIXLocks({}); - assertTransactionAcquiresIXLocks({readConcern: {level: "local"}}); - assertTransactionAcquiresIXLocks({readConcern: {level: "snapshot"}}); - assertTransactionAcquiresIXLocks({ - readConcern: { - level: "snapshot", - atClusterTime: session.getOperationTime(), + // We use the captrunc command as a catalog operation that requires a MODE_X lock on the + // collection. This ensures we aren't having the dbHash command queue up behind it on a + // database-level lock. The collection isn't capped so it'll fail with an + // IllegalOperation error response. + assert.commandFailedWithCode(db.runCommand({captrunc: "mycoll", n: 1}), + ErrorCodes.IllegalOperation); + return {ok: 1}; + } catch (e) { + return {ok: 0, error: e.toString(), stack: e.stack}; } - }); + }, db.getMongo().host); + + threadCaptruncCmd.start(); + + assert.soon(() => { + const ops = db.currentOp({"command.captrunc": "mycoll", waitingForLock: true}).inprog; + return ops.length === 1; + }, () => "Failed to find create collection in currentOp() output: " + tojson(db.currentOp())); + + const threadDBHash = new ScopedThread(function(host, clusterTime) { + try { + const conn = new Mongo(host); + const db = conn.getDB("test"); + assert.commandWorked(db.runCommand({ + dbHash: 1, + $_internalReadAtClusterTime: eval(clusterTime), + })); + return {ok: 1}; + } catch (e) { + return {ok: 0, error: e.toString(), stack: e.stack}; + } + }, db.getMongo().host, tojson(clusterTime)); + + threadDBHash.start(); + + assert.soon(() => { + const ops = db.currentOp({"command.dbHash": 1, waitingForLock: true}).inprog; + if (ops.length === 0) { + return false; + } + // The lock mode for the Global resource is reported as "w" rather than "r" because of the + // mode taken for the ReplicationStateTransitionLock when acquiring the global lock. + assert.eq(ops[0].locks, {Global: "w", Database: "r", Collection: "r"}); + return true; + }, () => "Failed to find create collection in currentOp() output: " + tojson(db.currentOp())); + + session.commitTransaction(); + threadCaptruncCmd.join(); + threadDBHash.join(); + + assert.commandWorked(threadCaptruncCmd.returnData()); + assert.commandWorked(threadDBHash.returnData()); session.endSession(); rst.stopSet(); diff --git a/jstests/replsets/dbhash_read_at_cluster_time.js b/jstests/replsets/dbhash_read_at_cluster_time.js new file mode 100644 index 00000000000..252b5004004 --- /dev/null +++ b/jstests/replsets/dbhash_read_at_cluster_time.js @@ -0,0 +1,120 @@ +/** + * Tests that "$_internalReadAtClusterTime" is supported by the "dbHash" command. + * + * @tags: [uses_transactions] + */ +(function() { + "use strict"; + + const rst = new ReplSetTest({nodes: 2}); + rst.startSet(); + + const replSetConfig = rst.getReplSetConfig(); + replSetConfig.members[1].priority = 0; + rst.initiate(replSetConfig); + + const primary = rst.getPrimary(); + const secondary = rst.getSecondary(); + + const session = primary.startSession({causalConsistency: false}); + const db = session.getDatabase("test"); + let txnNumber = 0; + + // We prevent the replica set from advancing oldest_timestamp. This ensures that the snapshot + // associated with 'clusterTime' is retained for the duration of this test. + rst.nodes.forEach(conn => { + assert.commandWorked(conn.adminCommand({ + configureFailPoint: "WTPreserveSnapshotHistoryIndefinitely", + mode: "alwaysOn", + })); + }); + + // We insert a document and save the md5sum associated with the opTime of that write. + assert.commandWorked(db.mycoll.insert({_id: 1}, {writeConcern: {w: "majority"}})); + const clusterTime = db.getSession().getOperationTime(); + + let res = assert.commandWorked(db.runCommand({ + dbHash: 1, + $_internalReadAtClusterTime: clusterTime, + })); + + const hash1 = {collections: res.collections, md5: res.md5}; + + // We insert another document to ensure the collection's contents have a different md5sum now. + // We use a w=majority write concern to ensure that the insert has also been applied on the + // secondary by the time we go to run the dbHash command later. This avoids a race where the + // replication subsystem could be applying the insert operation when the dbHash command is run + // on the secondary. + assert.commandWorked(db.mycoll.insert({_id: 2}, {writeConcern: {w: "majority"}})); + + // However, using $_internalReadAtClusterTime to read at the opTime of the first insert should + // return the same md5sum as it did originally. + res = assert.commandWorked(db.runCommand({ + dbHash: 1, + $_internalReadAtClusterTime: clusterTime, + })); + + const hash2 = {collections: res.collections, md5: res.md5}; + assert.eq(hash1, hash2, "primary returned different dbhash after second insert"); + + { + const secondarySession = secondary.startSession({causalConsistency: false}); + const secondaryDB = secondarySession.getDatabase("test"); + + // Using $_internalReadAtClusterTime to read at the opTime of the first insert should return + // the same md5sum on the secondary as it did on the primary. + res = assert.commandWorked(secondaryDB.runCommand({ + dbHash: 1, + $_internalReadAtClusterTime: clusterTime, + })); + + const secondaryHash = {collections: res.collections, md5: res.md5}; + assert.eq(hash1, secondaryHash, "primary and secondary have different dbhash"); + } + + { + const otherSession = primary.startSession({causalConsistency: false}); + const otherDB = otherSession.getDatabase("test"); + + // We perform another insert inside a separate transaction to cause a MODE_IX lock to be + // held on the collection. + otherSession.startTransaction(); + assert.commandWorked(otherDB.mycoll.insert({_id: 3})); + + // It should be possible to run the "dbHash" command with "$_internalReadAtClusterTime" + // concurrently. + res = assert.commandWorked(db.runCommand({ + dbHash: 1, + $_internalReadAtClusterTime: clusterTime, + })); + + const hash3 = {collections: res.collections, md5: res.md5}; + assert.eq(hash1, hash3, "primary returned different dbhash after third insert"); + + // However, the "dbHash" command should block behind the transaction if + // "$_internalReadAtClusterTime" wasn't specified. + res = assert.commandFailedWithCode(db.runCommand({dbHash: 1, maxTimeMS: 1000}), + ErrorCodes.MaxTimeMSExpired); + + otherSession.abortTransaction_forTesting(); + otherSession.endSession(); + } + + { + const otherSession = primary.startSession({causalConsistency: false}); + const otherDB = otherSession.getDatabase("test"); + + // We create another collection inside a separate session to modify the collection catalog + // at an opTime later than 'clusterTime'. This prevents further usage of the snapshot + // associated with 'clusterTime' for snapshot reads. + assert.commandWorked(otherDB.runCommand({create: "mycoll2"})); + assert.commandFailedWithCode( + db.runCommand({dbHash: 1, $_internalReadAtClusterTime: clusterTime}), + ErrorCodes.SnapshotUnavailable); + + otherSession.endSession(); + } + + session.endSession(); + rst.stopSet(); +})(); diff --git a/jstests/replsets/read_at_cluster_time_outside_transactions.js b/jstests/replsets/read_at_cluster_time_outside_transactions.js new file mode 100644 index 00000000000..3df9e09976d --- /dev/null +++ b/jstests/replsets/read_at_cluster_time_outside_transactions.js @@ -0,0 +1,99 @@ +/** + * Tests that the "find" and "dbHash" commands support reading at a Timestamp by using the + * $_internalReadAtClusterTime option. + */ +(function() { + "use strict"; + + const rst = new ReplSetTest({nodes: 1}); + rst.startSet(); + rst.initiate(); + + const primary = rst.getPrimary(); + const db = primary.getDB("test"); + + const collName = "read_at_cluster_time_outside_transactions"; + const collection = db[collName]; + + // We prevent the replica set from advancing oldest_timestamp. This ensures that the snapshot + // associated with 'clusterTime' is retained for the duration of this test. + rst.nodes.forEach(conn => { + assert.commandWorked(conn.adminCommand({ + configureFailPoint: "WTPreserveSnapshotHistoryIndefinitely", + mode: "alwaysOn", + })); + }); + + // We insert 3 documents in order to have data to return for both the find and getMore commands + // when using a batch size of 2. We then save the md5sum associated with the opTime of the last + // insert. + assert.commandWorked(collection.insert({_id: 1, comment: "should be seen by find command"})); + assert.commandWorked(collection.insert({_id: 3, comment: "should be seen by find command"})); + assert.commandWorked(collection.insert({_id: 5, comment: "should be seen by getMore command"})); + + const clusterTime = db.getSession().getOperationTime(); + + let res = assert.commandWorked(db.runCommand({dbHash: 1})); + const hashAfterOriginalInserts = {collections: res.collections, md5: res.md5}; + + // The documents with _id=1 and _id=3 should be returned by the find command. + let cursor = collection.find().sort({_id: 1}).batchSize(2); + assert.eq({_id: 1, comment: "should be seen by find command"}, cursor.next()); + assert.eq({_id: 3, comment: "should be seen by find command"}, cursor.next()); + + // We then insert documents with _id=2 and _id=4. The document with _id=2 is positioned behind + // the _id index cursor and won't be returned by the getMore command. However, the document with + // _id=4 is positioned ahead and should end up being returned. + assert.commandWorked( + collection.insert({_id: 2, comment: "should not be seen by getMore command"})); + assert.commandWorked( + collection.insert({_id: 4, comment: "should be seen by non-snapshot getMore command"})); + assert.eq({_id: 4, comment: "should be seen by non-snapshot getMore command"}, cursor.next()); + assert.eq({_id: 5, comment: "should be seen by getMore command"}, cursor.next()); + assert(!cursor.hasNext()); + + // When using the $_internalReadAtClusterTime option with a clusterTime from after the + // original 3 documents were inserted, the document with _id=2 shouldn't be visible to the find + // command because it was inserted afterwards. The same applies to the document with _id=4 and + // the getMore command. + res = collection.runCommand("find", { + batchSize: 2, + sort: {_id: 1}, + $_internalReadAtClusterTime: clusterTime, + }); + + const batchSize = 2; + cursor = new DBCommandCursor(db, res, batchSize); + assert.eq({_id: 1, comment: "should be seen by find command"}, cursor.next()); + assert.eq({_id: 3, comment: "should be seen by find command"}, cursor.next()); + assert.eq({_id: 5, comment: "should be seen by getMore command"}, cursor.next()); + assert(!cursor.hasNext()); + + // Using the $_internalReadAtClusterTime option to read at the opTime of the last of the 3 + // original inserts should return the same md5sum as it did originally. + res = assert.commandWorked(db.runCommand({ + dbHash: 1, + $_internalReadAtClusterTime: clusterTime, + })); + + const hashAtClusterTime = {collections: res.collections, md5: res.md5}; + assert.eq(hashAtClusterTime, hashAfterOriginalInserts); + + // Attempting to read at a clusterTime in the future should return an error. + const futureClusterTime = new Timestamp(clusterTime.getTime() + 1000, 1); + + assert.commandFailedWithCode(collection.runCommand("find", { + batchSize: 2, + sort: {_id: 1}, + $_internalReadAtClusterTime: futureClusterTime, + }), + ErrorCodes.InvalidOptions); + + assert.commandFailedWithCode(db.runCommand({ + dbHash: 1, + $_internalReadAtClusterTime: futureClusterTime, + }), + ErrorCodes.InvalidOptions); + + rst.stopSet(); +})(); |