From 14b9051a791865503f3b101a62c0903f5c15a4a8 Mon Sep 17 00:00:00 2001 From: Louis Williams Date: Fri, 4 Feb 2022 15:58:46 +0000 Subject: SERVER-62650 Limit cache wait time when initializing RecordIds --- ...mmit_and_abort_large_unprepared_transactions.js | 27 +++++++++++++++------- .../storage/wiredtiger/wiredtiger_record_store.cpp | 27 ++++++++++++++++------ 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/jstests/core/txns/commit_and_abort_large_unprepared_transactions.js b/jstests/core/txns/commit_and_abort_large_unprepared_transactions.js index feb09ef4656..aff44fe900d 100644 --- a/jstests/core/txns/commit_and_abort_large_unprepared_transactions.js +++ b/jstests/core/txns/commit_and_abort_large_unprepared_transactions.js @@ -27,14 +27,25 @@ const sessionDB = session.getDatabase(dbName); const sessionColl = sessionDB.getCollection(collName); // Test committing an unprepared large transaction with two 10MB inserts. -let doc1 = createLargeDocument(1); -let doc2 = createLargeDocument(2); -session.startTransaction(); -assert.commandWorked(sessionColl.insert(doc1)); -assert.commandWorked(sessionColl.insert(doc2)); - -assert.commandWorked(session.commitTransaction_forTesting()); -assert.sameMembers(sessionColl.find().toArray(), [doc1, doc2]); +try { + let doc1 = createLargeDocument(1); + let doc2 = createLargeDocument(2); + + session.startTransaction(); + assert.commandWorked(sessionColl.insert(doc1)); + assert.commandWorked(sessionColl.insert(doc2)); + assert.commandWorked(session.commitTransaction_forTesting()); + assert.sameMembers(sessionColl.find().toArray(), [doc1, doc2]); +} catch (e) { + // It may be possible for this test to run in a passthrough where such a large transaction fills + // up the cache and cannot commit. The transaction will be rolled-back with a WriteConflict as a + // result. + if (e.code === ErrorCodes.WriteConflict && e.errmsg.startsWith("Cache full")) { + jsTestLog("Ignoring WriteConflict due to large transaction's size"); + } else { + throw e; + } +} // Test aborting an unprepared large transaction with two 10MB inserts. let doc3 = createLargeDocument(3); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp index d743d8b2bdb..2c6e9e6f29f 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp @@ -1911,18 +1911,31 @@ void WiredTigerRecordStore::_initNextIdIfNeeded(OperationContext* opCtx) { // Initialize the highest seen RecordId in a session without a read timestamp because that is // required by the largest_key API. - WiredTigerSessionCache* cache = WiredTigerRecoveryUnit::get(opCtx)->getSessionCache(); - auto sessRaii = cache->getSession(); - auto cachedCursor = sessRaii->getCachedCursor(_tableId, ""); - auto cursor = cachedCursor ? cachedCursor : sessRaii->getNewCursor(_uri); - ON_BLOCK_EXIT([&] { sessRaii->releaseCursor(_tableId, cursor, ""); }); + WiredTigerSession sessRaii(_kvEngine->getConnection()); + + // We must limit the amount of time spent blocked on cache eviction to avoid a deadlock with + // ourselves. The calling operation may have a session open that has written a large amount of + // data, and by creating a new session, we are preventing WT from being able to roll back that + // transaction to free up cache space. If we do block on cache eviction here, we must consider + // that the other session owned by this thread may be the one that needs to be rolled back. If + // this does time out, we will receive a WT_CACHE_FULL and throw an error. + auto wtSession = sessRaii.getSession(); + invariantWTOK(wtSession->reconfigure(wtSession, "cache_max_wait_ms=1000"), wtSession); + + auto cursor = sessRaii.getNewCursor(_uri); // Find the largest RecordId in the table and add 1 to generate our next RecordId. The // largest_key API returns the largest key in the table regardless of visibility. This ensures // we don't re-use RecordIds that are not visible. int ret = cursor->largest_key(cursor); - if (ret != WT_NOTFOUND) { - invariantWTOK(ret, cursor->session); + if (ret == WT_CACHE_FULL) { + // Force the caller to rollback its transaction if we can't make progess with eviction. + // TODO (SERVER-60839): Convert this to a different error code that is distinguishable from + // a true write conflict. + throw WriteConflictException( + fmt::format("Cache full while performing initial write to '{}'", _ns)); + } else if (ret != WT_NOTFOUND) { + invariantWTOK(ret, wtSession); auto recordId = getKey(cursor); nextId = recordId.getLong() + 1; } -- cgit v1.2.1