summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Williams <louis.williams@mongodb.com>2021-05-25 12:25:52 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-06-16 20:07:14 +0000
commit0fc497c9d6a4a30fe0624ce0d7d8f8b7428c684d (patch)
treef439f5d7f0269def5bda67c8f6e3c2ea37ab3339
parent16e278b6d3a5322ffd08bd605b66905035b70fb4 (diff)
downloadmongo-0fc497c9d6a4a30fe0624ce0d7d8f8b7428c684d.tar.gz
SERVER-56839 Index seeks should advance their cursor until a
matching key is found This fixes a bug that can result in a seek on an index returning the wrong key if it is concurrent with a recently-committed prepared transaction on an adjacent record. (cherry picked from commit b18749767fc53a9b5822312a063afb26883a774a) (cherry picked from commit cc7ffaa7a7e7462037a40fffa833987e7aa644c5)
-rw-r--r--jstests/noPassthrough/prepare_read_cursor_out_of_bounds.js57
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp49
2 files changed, 103 insertions, 3 deletions
diff --git a/jstests/noPassthrough/prepare_read_cursor_out_of_bounds.js b/jstests/noPassthrough/prepare_read_cursor_out_of_bounds.js
new file mode 100644
index 00000000000..cbdfa6e5128
--- /dev/null
+++ b/jstests/noPassthrough/prepare_read_cursor_out_of_bounds.js
@@ -0,0 +1,57 @@
+/**
+ * Reproduces a bug where searching for a key returns an adjacent key in a recently-committed
+ * prepared transaction. See SERVER-56839.
+ *
+ * Create an index with a single key, "a". Insert a new key for "b" in a prepared transaction. This
+ * creates a prepared, but uncommitted index entry before the key we want to search for, "c", which
+ * doesn't exist. We will query (search_near internally) for "c" and the cursor will initially land
+ * on "a". This is less than they key were searching for, so the cursor is advanced to the next key,
+ * expecting to land on something greater than or equal to "c". Before this happens, the prepared
+ * transaction commits, making "b" visible. Ensure that the cursor does not return "b" even though
+ * we queried for "c".
+ *
+ * @tags: [
+ * requires_replication,
+ * uses_prepare_transaction,
+ * ]
+ */
+(function() {
+"use strict";
+load("jstests/core/txns/libs/prepare_helpers.js");
+load("jstests/libs/fail_point_util.js");
+load('jstests/libs/parallel_shell_helpers.js');
+
+const replTest = new ReplSetTest({nodes: 1});
+replTest.startSet();
+replTest.initiate();
+
+const primary = replTest.getPrimary();
+const dbName = 'test';
+const collName = 'coll';
+
+const db = primary.getDB(dbName);
+assert.commandWorked(db[collName].createIndex({x: 1}));
+assert.commandWorked(db[collName].insert({x: 'a'}));
+
+const session = primary.startSession({causalConsistency: false});
+const sessionDB = session.getDatabase(dbName);
+const sessionColl = sessionDB.getCollection(collName);
+session.startTransaction();
+sessionColl.insert({x: 'b'});
+const prepareTimestamp = PrepareHelpers.prepareTransaction(session);
+
+const failpoint = configureFailPoint(primary, "WTIndexPauseAfterSearchNear", {indexName: 'x_1'});
+
+// After the query on 'c' starts, we commit the transaction and advance the cursor. Expect that this
+// finds nothing.
+const awaitShell = startParallelShell(function() {
+ assert.eq(null, db.coll.findOne({x: 'c'}));
+}, primary.port);
+
+failpoint.wait();
+assert.commandWorked(PrepareHelpers.commitTransaction(session, prepareTimestamp));
+failpoint.off();
+awaitShell();
+
+replTest.stopSet();
+})();
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
index bb5351f6dac..f39007f4970 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
@@ -80,6 +80,7 @@ namespace {
MONGO_FAIL_POINT_DEFINE(WTCompactIndexEBUSY);
MONGO_FAIL_POINT_DEFINE(WTEmulateOutOfOrderNextIndexKey);
+MONGO_FAIL_POINT_DEFINE(WTIndexPauseAfterSearchNear);
using std::string;
using std::vector;
@@ -1092,8 +1093,8 @@ protected:
WT_CURSOR* c = _cursor->get();
int cmp = -1;
- const WiredTigerItem keyItem(query.getBuffer(), query.getSize());
- setKey(c, keyItem.Get());
+ const WiredTigerItem searchKey(query.getBuffer(), query.getSize());
+ setKey(c, searchKey.Get());
int ret = wiredTigerPrepareConflictRetry(_opCtx, [&] { return c->search_near(c, &cmp); });
if (ret == WT_NOTFOUND) {
@@ -1106,14 +1107,56 @@ protected:
LOGV2_TRACE_CURSOR(20089, "cmp: {cmp}", "cmp"_attr = cmp);
+ WTIndexPauseAfterSearchNear.executeIf(
+ [](const BSONObj&) {
+ LOGV2(5683901, "hanging after search_near");
+ WTIndexPauseAfterSearchNear.pauseWhileSet();
+ },
+ [&](const BSONObj& data) { return data["indexName"].str() == _idx.indexName(); });
+
if (cmp == 0) {
// Found it!
return true;
}
// Make sure we land on a matching key (after/before for forward/reverse).
- if (_forward ? cmp < 0 : cmp > 0) {
+ // If this operation is ignoring prepared updates and search_near() lands on a key that
+ // compares lower than the search key (for a forward cursor), calling next() is not
+ // guaranteed to return a key that compares greater than the search key. This is because
+ // ignoring prepare conflicts does not provide snapshot isolation and the call to next()
+ // may land on a newly-committed prepared entry. We must advance our cursor until we find a
+ // key that compares greater than the search key. The same principle applies to reverse
+ // cursors. See SERVER-56839.
+ const bool enforcingPrepareConflicts =
+ _opCtx->recoveryUnit()->getPrepareConflictBehavior() ==
+ PrepareConflictBehavior::kEnforce;
+ WT_ITEM curKey;
+ while (_forward ? cmp < 0 : cmp > 0) {
advanceWTCursor();
+ if (_cursorAtEof) {
+ break;
+ }
+
+ if (!kDebugBuild && enforcingPrepareConflicts) {
+ break;
+ }
+
+ getKey(c, &curKey);
+ cmp = std::memcmp(curKey.data, searchKey.data, std::min(searchKey.size, curKey.size));
+
+ LOGV2_TRACE_CURSOR(5683900, "cmp after advance: {cmp}", "cmp"_attr = cmp);
+
+ // We do not expect any exact matches or matches of prefixes by comparing keys of
+ // different lengths. Callers either seek using keys with discriminators that always
+ // compare unequally, or in the case of restoring a cursor, perform exact searches. In
+ // the case of an exact search, we will have returned earlier.
+ dassert(cmp);
+
+ if (enforcingPrepareConflicts) {
+ // If we are enforcing prepare conflicts, calling next() or prev() must always give
+ // us a key that compares, respectively, greater than or less than our search key.
+ dassert(_forward ? cmp > 0 : cmp < 0);
+ }
}
return false;