summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Hirschhorn <max.hirschhorn@mongodb.com>2018-05-17 20:59:18 -0400
committerMax Hirschhorn <max.hirschhorn@mongodb.com>2018-05-17 20:59:18 -0400
commit25b0e6f7d22de88faaa7e223195992e995acdff4 (patch)
tree0e161ecf41af78dd5b4553549ea7a9df832ce242
parent05651d31cad6fa886a436fda597234ceebf52dfd (diff)
downloadmongo-25b0e6f7d22de88faaa7e223195992e995acdff4.tar.gz
SERVER-34778 Add support for dbHash command inside multi-stmt txn.
The dbHash command is only allowed inside of a multi-statement transaction when test commands are enabled. Also introduces a WTPreserveSnapshotHistoryIndefinitely failpoint to skip setting the oldest timestamp.
-rw-r--r--jstests/replsets/dbhash_at_cluster_time.js124
-rw-r--r--src/mongo/db/commands/dbhash.cpp45
-rw-r--r--src/mongo/db/service_entry_point_common.cpp1
-rw-r--r--src/mongo/db/session.cpp8
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp10
5 files changed, 185 insertions, 3 deletions
diff --git a/jstests/replsets/dbhash_at_cluster_time.js b/jstests/replsets/dbhash_at_cluster_time.js
new file mode 100644
index 00000000000..b3b57965258
--- /dev/null
+++ b/jstests/replsets/dbhash_at_cluster_time.js
@@ -0,0 +1,124 @@
+/**
+ * Tests that "atClusterTime" is supported by the "dbHash" command.
+ */
+(function() {
+ "use strict";
+
+ const rst = new ReplSetTest({nodes: 2});
+ rst.startSet();
+
+ const replSetConfig = rst.getReplSetConfig();
+ replSetConfig.members[1].priority = 0;
+ rst.initiate(replSetConfig);
+
+ const primary = rst.getPrimary();
+ const secondary = rst.getSecondary();
+
+ const session = primary.startSession({causalConsistency: false});
+ const db = session.getDatabase("test");
+ let txnNumber = 0;
+
+ if (!db.serverStatus().storageEngine.supportsSnapshotReadConcern) {
+ rst.stopSet();
+ return;
+ }
+
+ // We force 'secondary' to sync from 'primary' using the "forceSyncSourceCandidate" failpoint to
+ // ensure that an intermittent connectivity issue doesn't lead to the secondary not advancing
+ // its belief of the majority commit point. This avoids any complications that would arise due
+ // to SERVER-33248.
+ assert.commandWorked(secondary.adminCommand({
+ configureFailPoint: "forceSyncSourceCandidate",
+ mode: "alwaysOn",
+ data: {hostAndPort: primary.host}
+ }));
+ rst.awaitSyncSource(secondary, primary);
+
+ // We also prevent all nodes in the replica set from advancing oldest_timestamp. This ensures
+ // that the snapshot associated with 'clusterTime' is retained for the duration of this test.
+ rst.nodes.forEach(conn => {
+ assert.commandWorked(conn.adminCommand({
+ configureFailPoint: "WTPreserveSnapshotHistoryIndefinitely",
+ mode: "alwaysOn",
+ }));
+ });
+
+ // We insert a document and save the md5sum associated with the opTime of that write.
+ assert.commandWorked(db.mycoll.insert({_id: 1}, {writeConcern: {w: "majority"}}));
+ const clusterTime = db.getSession().getOperationTime();
+
+ session.startTransaction({readConcern: {level: "snapshot", atClusterTime: clusterTime}});
+ let res = assert.commandWorked(db.runCommand({dbHash: 1}));
+ session.commitTransaction();
+ const hash1 = {collections: res.collections, md5: res.md5};
+
+ // We insert another document to ensure the collection's contents have a different md5sum now.
+ assert.commandWorked(db.mycoll.insert({_id: 2}));
+
+ // However, using atClusterTime to read at the opTime of the first insert should return the same
+ // md5sum as it did originally.
+ session.startTransaction({readConcern: {level: "snapshot", atClusterTime: clusterTime}});
+ res = assert.commandWorked(db.runCommand({dbHash: 1}));
+ session.commitTransaction();
+ const hash2 = {collections: res.collections, md5: res.md5};
+ assert.eq(hash1, hash2, "primary returned different dbhash after second insert");
+
+ {
+ const secondarySession = secondary.startSession({causalConsistency: false});
+ const secondaryDB = secondarySession.getDatabase("test");
+
+ // Using atClusterTime to read at the opTime of the first insert should return the same
+ // md5sum on the secondary as it did on the primary.
+ secondarySession.startTransaction(
+ {readConcern: {level: "snapshot", atClusterTime: clusterTime}});
+ res = assert.commandWorked(secondaryDB.runCommand({dbHash: 1}));
+ secondarySession.commitTransaction();
+ const secondaryHash = {collections: res.collections, md5: res.md5};
+ assert.eq(hash1, secondaryHash, "primary and secondary have different dbhash");
+
+ secondarySession.endSession();
+ }
+
+ {
+ const otherSession = primary.startSession({causalConsistency: false});
+ const otherDB = otherSession.getDatabase("test");
+
+ // We perform another insert inside a separate transaction to cause a MODE_IX lock to be
+ // held on the collection.
+ otherSession.startTransaction();
+ assert.commandWorked(otherDB.mycoll.insert({_id: 3}));
+
+ // It should be possible to run the "dbHash" command with "atClusterTime" concurrently.
+ session.startTransaction({readConcern: {level: "snapshot", atClusterTime: clusterTime}});
+ res = assert.commandWorked(db.runCommand({dbHash: 1}));
+ session.commitTransaction();
+ const hash3 = {collections: res.collections, md5: res.md5};
+ assert.eq(hash1, hash3, "primary returned different dbhash after third insert");
+
+ // However, the "dbHash" command should block behind the transaction if "atClusterTime"
+ // wasn't specified.
+ res = assert.commandFailedWithCode(db.runCommand({dbHash: 1, maxTimeMS: 1000}),
+ ErrorCodes.ExceededTimeLimit);
+
+ otherSession.abortTransaction();
+ otherSession.endSession();
+ }
+
+ {
+ const otherSession = primary.startSession({causalConsistency: false});
+ const otherDB = otherSession.getDatabase("test");
+
+ // We create another collection inside a separate session to modify the collection catalog
+ // at an opTime later than 'clusterTime'. This prevents further usage of the snapshot
+ // associated with 'clusterTime' for snapshot reads.
+ assert.commandWorked(otherDB.runCommand({create: "mycoll2"}));
+ session.startTransaction({readConcern: {level: "snapshot", atClusterTime: clusterTime}});
+ assert.commandFailedWithCode(db.runCommand({dbHash: 1}), ErrorCodes.SnapshotUnavailable);
+ session.abortTransaction();
+
+ otherSession.endSession();
+ }
+
+ session.endSession();
+ rst.stopSet();
+})();
diff --git a/src/mongo/db/commands/dbhash.cpp b/src/mongo/db/commands/dbhash.cpp
index c7aa1c64e14..29a7e1fb129 100644
--- a/src/mongo/db/commands/dbhash.cpp
+++ b/src/mongo/db/commands/dbhash.cpp
@@ -32,6 +32,7 @@
#include "mongo/platform/basic.h"
+#include <boost/optional.hpp>
#include <map>
#include <string>
@@ -62,6 +63,17 @@ public:
return false;
}
+ ReadWriteType getReadWriteType() const override {
+ return ReadWriteType::kRead;
+ }
+
+ bool supportsReadConcern(const std::string& dbName,
+ const BSONObj& cmdObj,
+ repl::ReadConcernLevel level) const override {
+ return level == repl::ReadConcernLevel::kLocalReadConcern ||
+ level == repl::ReadConcernLevel::kSnapshotReadConcern;
+ }
+
AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
return AllowedOnSecondary::kAlways;
}
@@ -101,7 +113,14 @@ public:
// We lock the entire database in S-mode in order to ensure that the contents will not
// change for the snapshot.
- AutoGetDb autoDb(opCtx, ns, MODE_S);
+ auto lockMode = LockMode::MODE_S;
+ if (repl::ReadConcernArgs::get(opCtx).getArgsAtClusterTime()) {
+ // However, if we are using "atClusterTime" to read from a consistent snapshot, then we
+ // only need to lock the database in intent mode to ensure that none of the collections
+ // get dropped.
+ lockMode = getLockModeForQuery(opCtx);
+ }
+ AutoGetDb autoDb(opCtx, ns, lockMode);
Database* db = autoDb.getDb();
std::list<std::string> colls;
if (db) {
@@ -177,6 +196,30 @@ private:
if (!collection)
return "";
+ boost::optional<Lock::CollectionLock> collLock;
+ if (repl::ReadConcernArgs::get(opCtx).getArgsAtClusterTime()) {
+ // When using "atClusterTime", we are only holding the database lock in intent mode. We
+ // need to also acquire the collection lock in intent mode to ensure reading from the
+ // consistent snapshot doesn't overlap with any catalog operations on the collection.
+ invariant(opCtx->lockState()->isDbLockedForMode(db->name(), MODE_IS));
+ collLock.emplace(opCtx->lockState(), fullCollectionName, getLockModeForQuery(opCtx));
+
+ auto minSnapshot = collection->getMinimumVisibleSnapshot();
+ auto mySnapshot = opCtx->recoveryUnit()->getPointInTimeReadTimestamp();
+ invariant(mySnapshot);
+
+ uassert(ErrorCodes::SnapshotUnavailable,
+ str::stream() << "Unable to read from a snapshot due to pending collection"
+ " catalog changes; please retry the operation. Snapshot"
+ " timestamp is "
+ << mySnapshot->toString()
+ << ". Collection minimum timestamp is "
+ << minSnapshot->toString(),
+ !minSnapshot || *mySnapshot >= *minSnapshot);
+ } else {
+ invariant(opCtx->lockState()->isDbLockedForMode(db->name(), MODE_S));
+ }
+
IndexDescriptor* desc = collection->getIndexCatalog()->findIdIndex(opCtx);
std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> exec;
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index 90e0b686c76..05d6f3ef09d 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -111,6 +111,7 @@ const StringMap<int> sessionCheckoutWhitelist = {{"abortTransaction", 1},
{"applyOps", 1},
{"commitTransaction", 1},
{"count", 1},
+ {"dbHash", 1},
{"delete", 1},
{"distinct", 1},
{"doTxn", 1},
diff --git a/src/mongo/db/session.cpp b/src/mongo/db/session.cpp
index 75afa18586d..22683a3ab59 100644
--- a/src/mongo/db/session.cpp
+++ b/src/mongo/db/session.cpp
@@ -110,6 +110,10 @@ const StringMap<int> txnCmdWhitelist = {{"abortTransaction", 1},
{"prepareTransaction", 1},
{"update", 1}};
+// The command names that are allowed in a multi-document transaction only when test commands are
+// enabled.
+const StringMap<int> txnCmdForTestingWhitelist = {{"dbHash", 1}};
+
// The commands that can be run on the 'admin' database in multi-document transactions.
const StringMap<int> txnAdminCommands = {
{"abortTransaction", 1}, {"commitTransaction", 1}, {"doTxn", 1}, {"prepareTransaction", 1}};
@@ -347,7 +351,9 @@ void Session::beginOrContinueTxn(OperationContext* opCtx,
uassert(50767,
str::stream() << "Cannot run '" << cmdName << "' in a multi-document transaction.",
- !autocommit || txnCmdWhitelist.find(cmdName) != txnCmdWhitelist.cend());
+ !autocommit || txnCmdWhitelist.find(cmdName) != txnCmdWhitelist.cend() ||
+ (getTestCommandsEnabled() &&
+ txnCmdForTestingWhitelist.find(cmdName) != txnCmdForTestingWhitelist.cend()));
uassert(50844,
str::stream() << "Cannot run command against the '" << dbName
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
index 2596a4f5cb5..25685dc5024 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
@@ -1104,6 +1104,12 @@ void WiredTigerKVEngine::setOldestTimestamp(Timestamp oldestTimestamp) {
_setOldestTimestamp(oldestTimestamp, doForce);
}
+namespace {
+
+MONGO_FP_DECLARE(WTPreserveSnapshotHistoryIndefinitely);
+
+} // namespace
+
void WiredTigerKVEngine::setStableTimestamp(Timestamp stableTimestamp) {
if (stableTimestamp.isNull()) {
return;
@@ -1144,7 +1150,9 @@ void WiredTigerKVEngine::setStableTimestamp(Timestamp stableTimestamp) {
// Communicate to WiredTiger that it can clean up timestamp data earlier than the timestamp
// provided. No future queries will need point-in-time reads at a timestamp prior to the one
// provided here.
- _setOldestTimestamp(stableTimestamp);
+ if (!MONGO_FAIL_POINT(WTPreserveSnapshotHistoryIndefinitely)) {
+ _setOldestTimestamp(stableTimestamp);
+ }
}
void WiredTigerKVEngine::_setOldestTimestamp(Timestamp oldestTimestamp, bool force) {